From 6a796d857122f6cac2cee2f5c2dbacc1927f75af Mon Sep 17 00:00:00 2001 From: Matt Bruce Date: Sun, 10 Aug 2025 10:28:29 -0500 Subject: [PATCH] Signed-off-by: Matt Bruce --- PRD.md | 30 +++- README.md | 16 +- add_manual_video.py | 14 +- build_cache_from_raw.py | 9 +- data/config.json => config.json | 1 + data/cleanup_recent_tracking.py | 6 +- data/songlist_report.py | 14 +- example_custom_data_directory.py | 198 +++++++++++++++++++++ karaoke_downloader/cache_manager.py | 4 +- karaoke_downloader/channel_parser.py | 10 +- karaoke_downloader/cli.py | 40 +++-- karaoke_downloader/config_manager.py | 31 +++- karaoke_downloader/data_path_manager.py | 184 +++++++++++++++++++ karaoke_downloader/download_planner.py | 3 +- karaoke_downloader/downloader.py | 13 +- karaoke_downloader/manual_video_manager.py | 10 +- karaoke_downloader/server_manager.py | 14 +- karaoke_downloader/songlist_generator.py | 10 +- karaoke_downloader/songlist_manager.py | 13 +- karaoke_downloader/tracking_manager.py | 11 +- reset_and_redownload.py | 10 +- 21 files changed, 580 insertions(+), 61 deletions(-) rename data/config.json => config.json (98%) create mode 100644 example_custom_data_directory.py create mode 100644 karaoke_downloader/data_path_manager.py diff --git a/PRD.md b/PRD.md index 9f7c748..48389a6 100644 --- a/PRD.md +++ b/PRD.md @@ -152,8 +152,8 @@ KaroakeVideoDownloader/ │ ├── check_resolution.py # Resolution checker utility │ ├── resolution_cli.py # Resolution config CLI │ └── tracking_cli.py # Tracking management CLI -├── data/ # All config, tracking, cache, and songlist files -│ ├── config.json +├── config.json # Main configuration file +├── data/ # All tracking, cache, and songlist files │ ├── karaoke_tracking.json │ ├── songlist_tracking.json │ ├── channel_cache.json @@ -563,6 +563,32 @@ python3 src/tests/test_macos.py # 3. Install FFmpeg: brew install ffmpeg ``` +## 🔧 Recent Bug Fixes & Improvements (v3.4.7) +### **Configurable Data Directory Path** +- **Centralized Data Path Management**: New `data_path_manager.py` module provides unified data directory path management +- **Configurable Location**: Data directory path can be set in `config.json` under `folder_structure.data_dir` +- **Backward Compatibility**: Defaults to "data" directory if not configured +- **Cross-Project Integration**: Enables the karaoke downloader to be used as a component in other projects with different data directory structures +- **Updated All Modules**: All modules now use the data path manager instead of hardcoded "data/" paths +- **Utility Functions**: Provides `get_data_path()`, `get_data_dir()`, and `get_data_path_manager()` functions for easy access +- **Fixed Circular Dependency**: Moved `config.json` from `data/` to root directory to resolve chicken-and-egg problem + +### **Benefits of Configurable Data Directory** +- **Flexible Deployment**: Can be integrated into other projects with different directory structures +- **Centralized Configuration**: Single point of configuration for all data file paths +- **Maintainable Code**: Eliminates hardcoded paths throughout the codebase +- **Easy Testing**: Can use temporary directories for testing without affecting production data +- **Future-Proof**: Makes it easier to change data directory structure in the future + +### **Circular Dependency Solution** +The original implementation had a circular dependency problem: +- **Problem**: `config.json` was located in the `data/` directory +- **Issue**: To read the config file, we needed to know where the data directory is +- **Conflict**: But the data directory location is specified in the config file +- **Solution**: Moved `config.json` to the root directory as a fixed location +- **Result**: Config file is always accessible, and data directory can be configured within it +- **Backward Compatibility**: System still works with config files in custom data directories when explicitly specified + ## 🔧 Recent Bug Fixes & Improvements (v3.4.6) ### **Dry Run Mode** - **New `--dry-run` parameter**: Build download plan and show what would be downloaded without actually downloading anything diff --git a/README.md b/README.md index 234348f..8a0174f 100644 --- a/README.md +++ b/README.md @@ -29,6 +29,12 @@ A Python-based cross-platform CLI tool to download karaoke videos from YouTube c ## 🏗️ Architecture The codebase has been comprehensively refactored into a modular architecture with centralized utilities for improved maintainability, error handling, and code reuse: +### **Configurable Data Directory (v3.4.7)** +- **Centralized Data Path Management**: `data_path_manager.py` provides unified data directory path management +- **Configurable Location**: Data directory path can be set in `config.json` under `folder_structure.data_dir` +- **Backward Compatibility**: Defaults to "data" directory if not configured +- **Cross-Project Integration**: Enables the karaoke downloader to be used as a component in other projects with different data directory structures + ### Core Modules: - **`downloader.py`**: Main orchestrator and CLI interface - **`video_downloader.py`**: Core video download execution and orchestration @@ -53,6 +59,9 @@ The codebase has been comprehensively refactored into a modular architecture wit - **`file_utils.py`**: Centralized file operations, filename sanitization, and file validation - **`song_validator.py`**: Centralized song validation logic for checking if songs should be downloaded +### New Utility Modules (v3.4.7): +- **`data_path_manager.py`**: Centralized data directory path management and file path resolution + ### **Unified Download Workflow (v3.4.5)** - **`execute_unified_download_workflow()`**: Centralized download execution that all modes use - **`_execute_sequential_downloads()`**: Sequential download execution using DownloadPipeline @@ -401,8 +410,8 @@ KaroakeVideoDownloader/ │ ├── check_resolution.py # Resolution checker utility │ ├── resolution_cli.py # Resolution config CLI │ └── tracking_cli.py # Tracking management CLI -├── data/ # All config, tracking, cache, and songlist files -│ ├── config.json +├── config.json # Main configuration file +├── data/ # All tracking, cache, and songlist files │ ├── karaoke_tracking.json │ ├── songlist_tracking.json │ ├── channel_cache.json @@ -514,8 +523,9 @@ python download_karaoke.py --generate-unmatched-report --fuzzy-match --fuzzy-thr - Removes `.info.json` and `.meta` files after download ## 🛠️ Configuration -- All options are in `data/config.json` (format, resolution, metadata, etc.) +- All options are in `config.json` (format, resolution, metadata, etc.) - You can edit this file or use CLI flags to override +- **Configurable Data Directory**: The data directory path can be configured in `config.json` under `folder_structure.data_dir` (default: "data") ## 📋 Command Reference File diff --git a/add_manual_video.py b/add_manual_video.py index a7fc164..611bea8 100644 --- a/add_manual_video.py +++ b/add_manual_video.py @@ -8,6 +8,8 @@ import re from pathlib import Path from typing import Dict, List, Optional +from karaoke_downloader.data_path_manager import get_data_path_manager + def extract_video_id(url: str) -> Optional[str]: """Extract video ID from YouTube URL.""" patterns = [ @@ -21,7 +23,9 @@ def extract_video_id(url: str) -> Optional[str]: return match.group(1) return None -def add_manual_video(title: str, url: str, manual_file: str = "data/manual_videos.json"): +def add_manual_video(title: str, url: str, manual_file: str = None): + if manual_file is None: + manual_file = str(get_data_path_manager().get_manual_videos_path()) """ Add a manual video to the collection. @@ -88,7 +92,9 @@ def add_manual_video(title: str, url: str, manual_file: str = "data/manual_video print(f" ID: {video_id}") return True -def list_manual_videos(manual_file: str = "data/manual_videos.json"): +def list_manual_videos(manual_file: str = None): + if manual_file is None: + manual_file = str(get_data_path_manager().get_manual_videos_path()) """List all manual videos.""" manual_path = Path(manual_file) @@ -108,7 +114,9 @@ def list_manual_videos(manual_file: str = "data/manual_videos.json"): print(f" ID: {video['id']}") print() -def remove_manual_video(video_id: str, manual_file: str = "data/manual_videos.json"): +def remove_manual_video(video_id: str, manual_file: str = None): + if manual_file is None: + manual_file = str(get_data_path_manager().get_manual_videos_path()) """Remove a manual video by ID.""" manual_path = Path(manual_file) diff --git a/build_cache_from_raw.py b/build_cache_from_raw.py index 85fb60c..bb27fd0 100644 --- a/build_cache_from_raw.py +++ b/build_cache_from_raw.py @@ -9,6 +9,8 @@ import re from datetime import datetime from pathlib import Path +from karaoke_downloader.data_path_manager import get_data_path_manager + def parse_raw_output_file(raw_file_path): """Parse the raw output file and extract valid videos.""" videos = [] @@ -73,7 +75,9 @@ def parse_raw_output_file(raw_file_path): return videos -def save_cache_file(channel_id, videos, cache_dir="data/channel_cache"): +def save_cache_file(channel_id, videos, cache_dir=None): + if cache_dir is None: + cache_dir = str(get_data_path_manager().get_channel_cache_dir()) """Save the parsed videos to a cache file.""" cache_dir = Path(cache_dir) cache_dir.mkdir(parents=True, exist_ok=True) @@ -97,7 +101,8 @@ def save_cache_file(channel_id, videos, cache_dir="data/channel_cache"): def main(): """Main function to build cache from raw output.""" - raw_file_path = Path("data/channel_cache/@VocalStarKaraoke_raw_output.txt") + data_path_manager = get_data_path_manager() + raw_file_path = data_path_manager.get_channel_cache_dir() / "@VocalStarKaraoke_raw_output.txt" if not raw_file_path.exists(): print(f"❌ Raw output file not found: {raw_file_path}") diff --git a/data/config.json b/config.json similarity index 98% rename from data/config.json rename to config.json index 1933a61..0f08943 100644 --- a/data/config.json +++ b/config.json @@ -26,6 +26,7 @@ "folder_structure": { "downloads_dir": "downloads", "logs_dir": "logs", + "data_dir": "data", "tracking_file": "downloaded_videos.json" }, "logging": { diff --git a/data/cleanup_recent_tracking.py b/data/cleanup_recent_tracking.py index eeac3e5..a655578 100644 --- a/data/cleanup_recent_tracking.py +++ b/data/cleanup_recent_tracking.py @@ -2,7 +2,11 @@ import json from pathlib import Path from datetime import datetime, time -def cleanup_recent_tracking(tracking_path="data/songlist_tracking.json", cutoff_time_str="11:00"): +from karaoke_downloader.data_path_manager import get_data_path_manager + +def cleanup_recent_tracking(tracking_path=None, cutoff_time_str="11:00"): + if tracking_path is None: + tracking_path = str(get_data_path_manager().get_songlist_tracking_path()) """Remove entries from songlist_tracking.json that were added after the specified time today.""" tracking_file = Path(tracking_path) if not tracking_file.exists(): diff --git a/data/songlist_report.py b/data/songlist_report.py index 3dffe1a..25e9181 100644 --- a/data/songlist_report.py +++ b/data/songlist_report.py @@ -1,11 +1,15 @@ import json from pathlib import Path +from karaoke_downloader.data_path_manager import get_data_path_manager + def normalize_title(title): normalized = title.replace("(Karaoke Version)", "").replace("(Karaoke)", "").strip() return " ".join(normalized.split()).lower() -def load_songlist(songlist_path="data/songList.json"): +def load_songlist(songlist_path=None): + if songlist_path is None: + songlist_path = str(get_data_path_manager().get_songlist_path()) songlist_file = Path(songlist_path) if not songlist_file.exists(): print(f"⚠️ Songlist file not found: {songlist_path}") @@ -24,14 +28,18 @@ def load_songlist(songlist_path="data/songList.json"): }) return all_songs -def load_songlist_tracking(tracking_path="data/songlist_tracking.json"): +def load_songlist_tracking(tracking_path=None): + if tracking_path is None: + tracking_path = str(get_data_path_manager().get_songlist_tracking_path()) tracking_file = Path(tracking_path) if not tracking_file.exists(): return {} with open(tracking_file, 'r', encoding='utf-8') as f: return json.load(f) -def load_server_songs(songs_path="data/songs.json"): +def load_server_songs(songs_path=None): + if songs_path is None: + songs_path = str(get_data_path_manager().get_songs_path()) """Load the list of songs already available on the server.""" songs_file = Path(songs_path) if not songs_file.exists(): diff --git a/example_custom_data_directory.py b/example_custom_data_directory.py new file mode 100644 index 0000000..22cab8c --- /dev/null +++ b/example_custom_data_directory.py @@ -0,0 +1,198 @@ +#!/usr/bin/env python3 +""" +Example: Using Karaoke Downloader with Custom Data Directory + +This example demonstrates how to integrate the karaoke downloader into another project +with a different data directory structure. +""" + +import os +import tempfile +from pathlib import Path + +from karaoke_downloader.data_path_manager import get_data_path_manager +from karaoke_downloader.config_manager import get_config_manager +from karaoke_downloader.downloader import KaraokeDownloader + + +def example_custom_data_directory(): + """Example of using the karaoke downloader with a custom data directory.""" + + print("🚀 Example: Custom Data Directory Integration") + print("=" * 50) + + # Create a temporary directory to simulate a different project structure + with tempfile.TemporaryDirectory() as temp_dir: + project_root = Path(temp_dir) / "my_karaoke_project" + project_root.mkdir(exist_ok=True) + + # Set up custom data directory structure + custom_data_dir = project_root / "karaoke_data" + custom_data_dir.mkdir(exist_ok=True) + + print(f"📁 Project root: {project_root}") + print(f"📁 Custom data directory: {custom_data_dir}") + + # Create a custom config file (in the custom data directory for this example) + config_file = custom_data_dir / "config.json" + config_data = { + "folder_structure": { + "data_dir": str(custom_data_dir), + "downloads_dir": str(project_root / "downloads"), + "logs_dir": str(project_root / "logs") + }, + "download_settings": { + "preferred_resolution": "720p" + } + } + + import json + with open(config_file, 'w') as f: + json.dump(config_data, f, indent=2) + + print(f"📄 Created config file: {config_file}") + + # Example 1: Using data path manager with custom directory + print("\n📋 Example 1: Data Path Manager") + data_path_manager = get_data_path_manager(str(custom_data_dir)) + + print(f" Data directory: {data_path_manager.data_dir}") + print(f" Songlist path: {data_path_manager.get_songlist_path()}") + print(f" Channels path: {data_path_manager.get_channels_json_path()}") + + # Example 2: Using config manager with custom directory + print("\n📋 Example 2: Config Manager") + config_manager = get_config_manager(str(custom_data_dir)) + config = config_manager.get_config() + + print(f" Config loaded from: {config_manager.config_file}") + print(f" Downloads directory: {config.folder_structure.downloads_dir}") + print(f" Logs directory: {config.folder_structure.logs_dir}") + print(f" Resolution: {config.download_settings.preferred_resolution}") + + # Example 3: Using downloader with custom directory + print("\n📋 Example 3: Karaoke Downloader") + try: + downloader = KaraokeDownloader() + print(f" Downloader initialized successfully") + print(f" Downloads directory: {downloader.downloads_dir}") + print(f" Logs directory: {downloader.logs_dir}") + except Exception as e: + print(f" Downloader initialization failed (expected): {e}") + + # Example 4: Creating sample data files + print("\n📋 Example 4: Sample Data Files") + + # Create a sample channels file + channels_file = data_path_manager.get_channels_json_path() + channels_data = { + "channels": [ + { + "name": "SingKingKaraoke", + "url": "https://www.youtube.com/@SingKingKaraoke/videos", + "parsing_rules": { + "format": "artist_title_separator", + "separator": " - ", + "artist_first": True + } + } + ] + } + + with open(channels_file, 'w') as f: + json.dump(channels_data, f, indent=2) + + print(f" Created channels file: {channels_file}") + + # Create a sample songlist file + songlist_file = data_path_manager.get_songlist_path() + songlist_data = [ + { + "title": "Sample Playlist", + "songs": [ + {"artist": "Artist 1", "title": "Song 1", "position": 1}, + {"artist": "Artist 2", "title": "Song 2", "position": 2} + ] + } + ] + + with open(songlist_file, 'w') as f: + json.dump(songlist_data, f, indent=2) + + print(f" Created songlist file: {songlist_file}") + + # List all files in the custom data directory + print(f"\n📋 Files in custom data directory:") + for file_path in custom_data_dir.iterdir(): + if file_path.is_file(): + print(f" - {file_path.name}") + + print(f"\n✅ Example completed successfully!") + print(f"📁 All data files are in: {custom_data_dir}") + + +def example_integration_pattern(): + """Example of integration pattern for other projects.""" + + print("\n🔧 Integration Pattern for Other Projects") + print("=" * 50) + + print(""" +# Integration Pattern: + +1. Set up your project structure: + my_project/ + ├── karaoke_data/ # Custom data directory + │ ├── config.json # Configuration + │ ├── channels.json # Channel definitions + │ ├── songList.json # Song lists + │ └── ... + ├── downloads/ # Downloaded videos + ├── logs/ # Log files + └── main.py # Your main application + +2. Initialize with custom data directory: + ```python + from karaoke_downloader.data_path_manager import get_data_path_manager + from karaoke_downloader.downloader import KaraokeDownloader + + # Set up custom data directory + custom_data_dir = "path/to/your/karaoke_data" + + # Get data path manager + data_path_manager = get_data_path_manager(custom_data_dir) + + # Initialize downloader (it will use the custom data directory) + downloader = KaraokeDownloader() + + # Use the downloader + downloader.download_songlist_across_channels( + channel_urls=["https://www.youtube.com/@SingKingKaraoke/videos"], + limit=5 + ) + ``` + +3. Configuration file (config.json in root, or karaoke_data/config.json for custom data directory): + ```json + { + "folder_structure": { + "data_dir": "path/to/your/karaoke_data", + "downloads_dir": "path/to/your/downloads", + "logs_dir": "path/to/your/logs" + }, + "download_settings": { + "preferred_resolution": "720p" + } + } + ``` +""") + + +def main(): + """Run the examples.""" + example_custom_data_directory() + example_integration_pattern() + + +if __name__ == "__main__": + main() diff --git a/karaoke_downloader/cache_manager.py b/karaoke_downloader/cache_manager.py index 621044b..550e632 100644 --- a/karaoke_downloader/cache_manager.py +++ b/karaoke_downloader/cache_manager.py @@ -9,6 +9,8 @@ import json from datetime import datetime, timedelta from pathlib import Path +from karaoke_downloader.data_path_manager import get_data_path_manager + # Constants DEFAULT_CACHE_EXPIRATION_DAYS = 1 DEFAULT_CACHE_FILENAME_LENGTH_LIMIT = 200 # Increased from 60 @@ -37,7 +39,7 @@ def get_download_plan_cache_file(mode, **kwargs): + hashlib.md5(base.encode()).hexdigest()[:8] ) - return Path(f"data/{base}.json") + return get_data_path_manager().get_path(f"{base}.json") def load_cached_plan(cache_file, max_age_days=DEFAULT_CACHE_EXPIRATION_DAYS): diff --git a/karaoke_downloader/channel_parser.py b/karaoke_downloader/channel_parser.py index b197bc3..e4c16bc 100644 --- a/karaoke_downloader/channel_parser.py +++ b/karaoke_downloader/channel_parser.py @@ -11,11 +11,15 @@ import re from typing import Dict, List, Optional, Tuple, Any from pathlib import Path +from karaoke_downloader.data_path_manager import get_data_path_manager + class ChannelParser: """Handles channel-specific parsing of video titles to extract artist and title.""" - def __init__(self, channels_file: str = "data/channels.json"): + def __init__(self, channels_file: str = None): + if channels_file is None: + channels_file = str(get_data_path_manager().get_channels_json_path()) """Initialize the parser with channel configuration.""" self.channels_file = Path(channels_file) self.channels_config = self._load_channels_config() @@ -238,7 +242,9 @@ class ChannelParser: # Convenience function for backward compatibility -def extract_artist_title(video_title: str, channel_name: str, channels_file: str = "data/channels.json") -> Tuple[str, str]: +def extract_artist_title(video_title: str, channel_name: str, channels_file: str = None) -> Tuple[str, str]: + if channels_file is None: + channels_file = str(get_data_path_manager().get_channels_json_path()) """ Convenience function to extract artist and title from a video title. diff --git a/karaoke_downloader/cli.py b/karaoke_downloader/cli.py index 98a5b82..1352040 100644 --- a/karaoke_downloader/cli.py +++ b/karaoke_downloader/cli.py @@ -12,6 +12,7 @@ from typing import List from karaoke_downloader.channel_parser import ChannelParser from karaoke_downloader.config_manager import AppConfig +from karaoke_downloader.data_path_manager import get_data_path_manager from karaoke_downloader.downloader import KaraokeDownloader # Constants @@ -19,16 +20,19 @@ DEFAULT_LATEST_PER_CHANNEL_LIMIT = 10 DEFAULT_FUZZY_THRESHOLD = 85 -def load_channels_from_json(channels_file: str = "data/channels.json") -> List[str]: +def load_channels_from_json(channels_file: str = None) -> List[str]: """ Load channel URLs from the new JSON format. Args: - channels_file: Path to the channels.json file + channels_file: Path to the channels.json file (if None, uses default from config) Returns: List of channel URLs """ + if channels_file is None: + channels_file = str(get_data_path_manager().get_channels_json_path()) + try: parser = ChannelParser(channels_file) channels = parser.channels_config.get("channels", []) @@ -38,16 +42,19 @@ def load_channels_from_json(channels_file: str = "data/channels.json") -> List[s return [] -def load_channels_from_text(channels_file: str = "data/channels.txt") -> List[str]: +def load_channels_from_text(channels_file: str = None) -> List[str]: """ Load channel URLs from the old text format (for backward compatibility). Args: - channels_file: Path to the channels.txt file + channels_file: Path to the channels.txt file (if None, uses default from config) Returns: List of channel URLs """ + if channels_file is None: + channels_file = str(get_data_path_manager().get_channels_txt_path()) + try: with open(channels_file, "r", encoding="utf-8") as f: return [ @@ -64,10 +71,11 @@ def load_channels(channel_file: str = None) -> List[str]: """Load channel URLs from file.""" if channel_file is None: # Try JSON first, then fall back to text - if os.path.exists("data/channels.json"): - return load_channels_from_json("data/channels.json") - elif os.path.exists("data/channels.txt"): - return load_channels_from_text("data/channels.txt") + data_path_manager = get_data_path_manager() + if data_path_manager.file_exists("channels.json"): + return load_channels_from_json() + elif data_path_manager.file_exists("channels.txt"): + return load_channels_from_text() else: return [] else: @@ -176,7 +184,7 @@ Examples: parser.add_argument( "--songlist-priority", action="store_true", - help="Prioritize downloads based on data/songList.json (default: enabled)", + help="Prioritize downloads based on songList.json in the data directory (default: enabled)", ) parser.add_argument( "--no-songlist-priority", @@ -218,7 +226,7 @@ Examples: parser.add_argument( "--songlist-file", metavar="FILE_PATH", - help="Custom songlist file path to use with --songlist-focus (default: data/songList.json)", + help="Custom songlist file path to use with --songlist-focus (default: songList.json in the data directory)", ) parser.add_argument( "--force", @@ -299,7 +307,7 @@ Examples: parser.add_argument( "--manual", action="store_true", - help="Download from manual videos collection (data/manual_videos.json)", + help="Download from manual videos collection (manual_videos.json in the data directory)", ) parser.add_argument( "--channel-focus", @@ -421,7 +429,7 @@ Examples: ) else: print( - "❌ No URL, --file, or channel configuration found. Please provide a channel URL or create data/channels.json." + "❌ No URL, --file, or channel configuration found. Please provide a channel URL or create channels.json in the data directory." ) sys.exit(1) # --- END NEW --- @@ -617,7 +625,7 @@ Examples: channel_urls = load_channels(args.file) if not channel_urls: print(f"❌ No channels found in configuration") - print("Please provide a channel URL or create data/channels.json") + print("Please provide a channel URL or create channels.json in the data directory") sys.exit(1) limit = args.limit if args.limit else DEFAULT_LATEST_PER_CHANNEL_LIMIT force_refresh_download_plan = ( @@ -649,7 +657,7 @@ Examples: # Load songlist based on focus mode if args.songlist_focus: # Load focused playlists - songlist_file_path = args.songlist_file if args.songlist_file else "data/songList.json" + songlist_file_path = args.songlist_file if args.songlist_file else str(get_data_path_manager().get_songlist_path()) songlist_file = Path(songlist_file_path) if not songlist_file.exists(): print(f"⚠️ Songlist file not found: {songlist_file_path}") @@ -697,12 +705,12 @@ Examples: songlist = [] else: # Load all songs from songlist - songlist_path = args.songlist_file if args.songlist_file else "data/songList.json" + songlist_path = args.songlist_file if args.songlist_file else str(get_data_path_manager().get_songlist_path()) songlist = load_songlist(songlist_path) if songlist: # Load channel URLs - channel_file = args.file if args.file else "data/channels.txt" + channel_file = args.file if args.file else str(get_data_path_manager().get_channels_txt_path()) if os.path.exists(channel_file): with open(channel_file, "r", encoding='utf-8') as f: channel_urls = [ diff --git a/karaoke_downloader/config_manager.py b/karaoke_downloader/config_manager.py index 15beedb..23a6e20 100644 --- a/karaoke_downloader/config_manager.py +++ b/karaoke_downloader/config_manager.py @@ -36,6 +36,7 @@ DEFAULT_CONFIG = { "folder_structure": { "downloads_dir": "downloads", "logs_dir": "logs", + "data_dir": "data", "tracking_file": "data/karaoke_tracking.json", }, "logging": { @@ -135,6 +136,7 @@ class FolderStructure: downloads_dir: str = "downloads" logs_dir: str = "logs" + data_dir: str = "data" tracking_file: str = "data/karaoke_tracking.json" @@ -165,14 +167,21 @@ class ConfigManager: Manages application configuration with loading, validation, and caching. """ - def __init__(self, config_file: Union[str, Path] = "data/config.json"): + def __init__(self, config_file: Union[str, Path] = "config.json", data_dir: Optional[str] = None): """ Initialize the configuration manager. Args: config_file: Path to the configuration file + data_dir: Optional custom data directory path """ - self.config_file = Path(config_file) + # If config_file is relative and data_dir is provided, make it relative to data_dir + if data_dir and not Path(config_file).is_absolute(): + self.config_file = Path(data_dir) / config_file + else: + self.config_file = Path(config_file) + + self._data_dir = data_dir self._config: Optional[AppConfig] = None self._last_modified: Optional[datetime] = None @@ -333,27 +342,35 @@ class ConfigManager: _config_manager: Optional[ConfigManager] = None -def get_config_manager() -> ConfigManager: +def get_config_manager(config_file: Optional[Union[str, Path]] = None, data_dir: Optional[str] = None) -> ConfigManager: """ Get the global configuration manager instance. + Args: + config_file: Optional path to config file (default: "config.json" in root) + data_dir: Optional custom data directory path + Returns: ConfigManager instance """ global _config_manager - if _config_manager is None: - _config_manager = ConfigManager() + if _config_manager is None or config_file is not None or data_dir is not None: + if config_file is None: + config_file = "config.json" + _config_manager = ConfigManager(config_file, data_dir) return _config_manager -def load_config(force_reload: bool = False) -> AppConfig: +def load_config(force_reload: bool = False, config_file: Optional[Union[str, Path]] = None, data_dir: Optional[str] = None) -> AppConfig: """ Load configuration using the global manager. Args: force_reload: Force reload even if file hasn't changed + config_file: Optional path to config file (default: "config.json" in root) + data_dir: Optional custom data directory path Returns: AppConfig instance """ - return get_config_manager().load_config(force_reload) + return get_config_manager(config_file, data_dir).load_config(force_reload) diff --git a/karaoke_downloader/data_path_manager.py b/karaoke_downloader/data_path_manager.py new file mode 100644 index 0000000..b6dba45 --- /dev/null +++ b/karaoke_downloader/data_path_manager.py @@ -0,0 +1,184 @@ +""" +Data path management utilities for the karaoke downloader. +Provides centralized data directory path management and file path resolution. +""" + +import os +from pathlib import Path +from typing import Optional + +from .config_manager import get_config_manager + + +class DataPathManager: + """ + Manages data directory paths and provides utilities for resolving file paths + relative to the configured data directory. + """ + + def __init__(self, data_dir: Optional[str] = None): + """ + Initialize the data path manager. + + Args: + data_dir: Optional custom data directory path. If None, uses config. + """ + self._data_dir = data_dir + + # If a custom data directory is provided, look for config.json in that directory + if data_dir: + config_file = Path(data_dir) / "config.json" + self._config_manager = get_config_manager(str(config_file)) + else: + # Otherwise, use the default config.json in the root directory + self._config_manager = get_config_manager() + + @property + def data_dir(self) -> Path: + """ + Get the configured data directory path. + + Returns: + Path to the data directory + """ + if self._data_dir: + return Path(self._data_dir) + + # Get from config + config = self._config_manager.get_config() + data_dir = getattr(config.folder_structure, 'data_dir', 'data') + return Path(data_dir) + + def get_path(self, filename: str) -> Path: + """ + Get the full path to a file in the data directory. + + Args: + filename: Name of the file (e.g., 'config.json', 'channels.json') + + Returns: + Full path to the file + """ + return self.data_dir / filename + + def get_channels_json_path(self) -> Path: + """Get path to channels.json file.""" + return self.get_path('channels.json') + + def get_channels_txt_path(self) -> Path: + """Get path to channels.txt file.""" + return self.get_path('channels.txt') + + def get_songlist_path(self) -> Path: + """Get path to songList.json file.""" + return self.get_path('songList.json') + + def get_songlist_tracking_path(self) -> Path: + """Get path to songlist_tracking.json file.""" + return self.get_path('songlist_tracking.json') + + def get_karaoke_tracking_path(self) -> Path: + """Get path to karaoke_tracking.json file.""" + return self.get_path('karaoke_tracking.json') + + def get_server_duplicates_tracking_path(self) -> Path: + """Get path to server_duplicates_tracking.json file.""" + return self.get_path('server_duplicates_tracking.json') + + def get_manual_videos_path(self) -> Path: + """Get path to manual_videos.json file.""" + return self.get_path('manual_videos.json') + + def get_songs_path(self) -> Path: + """Get path to songs.json file.""" + return self.get_path('songs.json') + + def get_channel_cache_dir(self) -> Path: + """Get path to channel_cache directory.""" + return self.get_path('channel_cache') + + def get_channel_cache_path(self, channel_id: str) -> Path: + """Get path to a specific channel cache file.""" + return self.get_channel_cache_dir() / f"{channel_id}.json" + + def get_download_plan_cache_path(self, plan_name: str, **kwargs) -> Path: + """Get path to download plan cache file.""" + # Create a hash from kwargs for unique cache files + import hashlib + if kwargs: + kwargs_str = str(sorted(kwargs.items())) + hash_suffix = hashlib.md5(kwargs_str.encode()).hexdigest()[:8] + plan_name = f"{plan_name}_{hash_suffix}" + return self.get_path(f"plan_latest_per_channel_{plan_name}.json") + + def get_unmatched_report_path(self, timestamp: Optional[str] = None) -> Path: + """Get path to unmatched songs report file.""" + if timestamp: + return self.get_path(f"unmatched_songs_report_{timestamp}.json") + return self.get_path("unmatched_songs_report.json") + + def ensure_data_dir_exists(self) -> None: + """Ensure the data directory exists.""" + self.data_dir.mkdir(parents=True, exist_ok=True) + + def list_data_files(self) -> list: + """List all files in the data directory.""" + if not self.data_dir.exists(): + return [] + + files = [] + for file_path in self.data_dir.iterdir(): + if file_path.is_file(): + files.append(file_path.name) + return sorted(files) + + def file_exists(self, filename: str) -> bool: + """Check if a file exists in the data directory.""" + return self.get_path(filename).exists() + + +# Global data path manager instance +_data_path_manager: Optional[DataPathManager] = None + + +def get_data_path_manager(data_dir: Optional[str] = None) -> DataPathManager: + """ + Get the global data path manager instance. + + Args: + data_dir: Optional custom data directory path + + Returns: + DataPathManager instance + """ + global _data_path_manager + if _data_path_manager is None or data_dir is not None: + _data_path_manager = DataPathManager(data_dir) + return _data_path_manager + + +def get_data_path(filename: str, data_dir: Optional[str] = None) -> Path: + """ + Get the full path to a file in the data directory. + + Args: + filename: Name of the file + data_dir: Optional custom data directory path + + Returns: + Full path to the file + """ + return get_data_path_manager(data_dir).get_path(filename) + + +def get_data_dir(data_dir: Optional[str] = None) -> Path: + """ + Get the configured data directory path. + + Args: + data_dir: Optional custom data directory path + + Returns: + Path to the data directory + """ + return get_data_path_manager(data_dir).data_dir diff --git a/karaoke_downloader/download_planner.py b/karaoke_downloader/download_planner.py index 71fe36a..71434df 100644 --- a/karaoke_downloader/download_planner.py +++ b/karaoke_downloader/download_planner.py @@ -27,6 +27,7 @@ from karaoke_downloader.fuzzy_matcher import ( normalize_title, ) from karaoke_downloader.channel_parser import ChannelParser +from karaoke_downloader.data_path_manager import get_data_path_manager from karaoke_downloader.youtube_utils import get_channel_info # Constants @@ -50,7 +51,7 @@ def generate_unmatched_report(unmatched: List[Dict[str, Any]], report_path: str """ if report_path is None: timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") - report_path = f"data/unmatched_songs_report_{timestamp}.json" + report_path = str(get_data_path_manager().get_unmatched_report_path(timestamp)) report_data = { "generated_at": datetime.now().isoformat(), diff --git a/karaoke_downloader/downloader.py b/karaoke_downloader/downloader.py index 6c4476f..0c58988 100644 --- a/karaoke_downloader/downloader.py +++ b/karaoke_downloader/downloader.py @@ -21,6 +21,7 @@ from karaoke_downloader.channel_manager import ( reset_channel_downloads, ) from karaoke_downloader.config_manager import get_config_manager, load_config +from karaoke_downloader.data_path_manager import get_data_path_manager from karaoke_downloader.download_pipeline import DownloadPipeline from karaoke_downloader.download_planner import build_download_plan from karaoke_downloader.error_utils import handle_yt_dlp_error, log_error @@ -89,8 +90,9 @@ class KaraokeDownloader: ensure_directory_exists(self.logs_dir) # Initialize tracking - tracking_file = DATA_DIR / "karaoke_tracking.json" - cache_dir = DATA_DIR / "channel_cache" + data_path_manager = get_data_path_manager() + tracking_file = data_path_manager.get_karaoke_tracking_path() + cache_dir = data_path_manager.get_channel_cache_dir() self.tracker = TrackingManager( tracking_file=tracking_file, cache_dir=cache_dir ) @@ -123,7 +125,7 @@ class KaraokeDownloader: self.dry_run = False self.download_limit = None self.force_download = False - self.songlist_file_path = "data/songList.json" # Default songlist file path + self.songlist_file_path = str(get_data_path_manager().get_songlist_path()) # Default songlist file path def _load_config(self): """Load configuration using the config manager.""" @@ -1179,8 +1181,9 @@ def reset_songlist_all(): from pathlib import Path # Load songlist tracking - songlist_tracking_file = Path("data/songlist_tracking.json") - karaoke_tracking_file = Path("data/karaoke_tracking.json") + data_path_manager = get_data_path_manager() + songlist_tracking_file = data_path_manager.get_songlist_tracking_path() + karaoke_tracking_file = data_path_manager.get_karaoke_tracking_path() if songlist_tracking_file.exists(): with open(songlist_tracking_file, "r", encoding="utf-8") as f: tracking = json.load(f) diff --git a/karaoke_downloader/manual_video_manager.py b/karaoke_downloader/manual_video_manager.py index 7e71615..5d7a2de 100644 --- a/karaoke_downloader/manual_video_manager.py +++ b/karaoke_downloader/manual_video_manager.py @@ -6,7 +6,11 @@ import json from pathlib import Path from typing import Dict, List, Optional, Any -def load_manual_videos(manual_file: str = "data/manual_videos.json") -> List[Dict[str, Any]]: +from karaoke_downloader.data_path_manager import get_data_path_manager + +def load_manual_videos(manual_file: str = None) -> List[Dict[str, Any]]: + if manual_file is None: + manual_file = str(get_data_path_manager().get_manual_videos_path()) """ Load manual videos from the JSON file. @@ -34,7 +38,9 @@ def load_manual_videos(manual_file: str = "data/manual_videos.json") -> List[Dic print(f"❌ Error loading manual videos: {e}") return [] -def get_manual_videos_for_channel(channel_name: str, manual_file: str = "data/manual_videos.json") -> List[Dict[str, Any]]: +def get_manual_videos_for_channel(channel_name: str, manual_file: str = None) -> List[Dict[str, Any]]: + if manual_file is None: + manual_file = str(get_data_path_manager().get_manual_videos_path()) """ Get manual videos for a specific channel. diff --git a/karaoke_downloader/server_manager.py b/karaoke_downloader/server_manager.py index 491880b..3fad88a 100644 --- a/karaoke_downloader/server_manager.py +++ b/karaoke_downloader/server_manager.py @@ -7,8 +7,12 @@ import json from datetime import datetime from pathlib import Path +from karaoke_downloader.data_path_manager import get_data_path_manager -def load_server_songs(songs_path="data/songs.json"): + +def load_server_songs(songs_path=None): + if songs_path is None: + songs_path = str(get_data_path_manager().get_songs_path()) """Load the list of songs already available on the server with format information.""" songs_file = Path(songs_path) if not songs_file.exists(): @@ -59,8 +63,10 @@ def should_skip_server_song(server_songs, artist, title): def load_server_duplicates_tracking( - tracking_path="data/server_duplicates_tracking.json", + tracking_path=None, ): + if tracking_path is None: + tracking_path = str(get_data_path_manager().get_server_duplicates_tracking_path()) """Load the tracking of songs found to be duplicates on the server.""" tracking_file = Path(tracking_path) if not tracking_file.exists(): @@ -74,8 +80,10 @@ def load_server_duplicates_tracking( def save_server_duplicates_tracking( - tracking, tracking_path="data/server_duplicates_tracking.json" + tracking, tracking_path=None ): + if tracking_path is None: + tracking_path = str(get_data_path_manager().get_server_duplicates_tracking_path()) """Save the tracking of songs found to be duplicates on the server.""" try: with open(tracking_path, "w", encoding="utf-8") as f: diff --git a/karaoke_downloader/songlist_generator.py b/karaoke_downloader/songlist_generator.py index 21191ce..c5ff224 100644 --- a/karaoke_downloader/songlist_generator.py +++ b/karaoke_downloader/songlist_generator.py @@ -4,11 +4,15 @@ from pathlib import Path from typing import List, Dict, Any, Optional from mutagen.mp4 import MP4 +from karaoke_downloader.data_path_manager import get_data_path_manager + class SongListGenerator: """Utility class for generating song lists from MP4 files with ID3 tags.""" - def __init__(self, songlist_path: str = "data/songList.json"): + def __init__(self, songlist_path: str = None): + if songlist_path is None: + songlist_path = str(get_data_path_manager().get_songlist_path()) self.songlist_path = Path(songlist_path) self.songlist_path.parent.mkdir(parents=True, exist_ok=True) @@ -239,8 +243,8 @@ Examples: parser.add_argument( "--songlist-path", - default="data/songList.json", - help="Path to the song list JSON file (default: data/songList.json)" + default=None, + help="Path to the song list JSON file (default: songList.json in the data directory)" ) args = parser.parse_args() diff --git a/karaoke_downloader/songlist_manager.py b/karaoke_downloader/songlist_manager.py index 3068958..0b35829 100644 --- a/karaoke_downloader/songlist_manager.py +++ b/karaoke_downloader/songlist_manager.py @@ -7,6 +7,7 @@ import json from datetime import datetime from pathlib import Path +from karaoke_downloader.data_path_manager import get_data_path_manager from karaoke_downloader.server_manager import ( check_and_mark_server_duplicate, is_song_marked_as_server_duplicate, @@ -16,7 +17,9 @@ from karaoke_downloader.server_manager import ( ) -def load_songlist(songlist_path="data/songList.json"): +def load_songlist(songlist_path=None): + if songlist_path is None: + songlist_path = str(get_data_path_manager().get_songlist_path()) songlist_file = Path(songlist_path) if not songlist_file.exists(): print(f"⚠️ Songlist file not found: {songlist_path}") @@ -55,7 +58,9 @@ def normalize_title(title): return " ".join(normalized.split()).lower() -def load_songlist_tracking(tracking_path="data/songlist_tracking.json"): +def load_songlist_tracking(tracking_path=None): + if tracking_path is None: + tracking_path = str(get_data_path_manager().get_songlist_tracking_path()) tracking_file = Path(tracking_path) if not tracking_file.exists(): return {} @@ -67,7 +72,9 @@ def load_songlist_tracking(tracking_path="data/songlist_tracking.json"): return {} -def save_songlist_tracking(tracking, tracking_path="data/songlist_tracking.json"): +def save_songlist_tracking(tracking, tracking_path=None): + if tracking_path is None: + tracking_path = str(get_data_path_manager().get_songlist_tracking_path()) try: with open(tracking_path, "w", encoding="utf-8") as f: json.dump(tracking, f, indent=2, ensure_ascii=False) diff --git a/karaoke_downloader/tracking_manager.py b/karaoke_downloader/tracking_manager.py index 175e3f8..38a14e9 100644 --- a/karaoke_downloader/tracking_manager.py +++ b/karaoke_downloader/tracking_manager.py @@ -6,6 +6,8 @@ from enum import Enum from pathlib import Path from typing import Any, Dict, List, Optional, Tuple +from karaoke_downloader.data_path_manager import get_data_path_manager + class SongStatus(str, Enum): NOT_DOWNLOADED = "NOT_DOWNLOADED" DOWNLOADING = "DOWNLOADING" @@ -25,9 +27,14 @@ class FormatType(str, Enum): class TrackingManager: def __init__( self, - tracking_file="data/karaoke_tracking.json", - cache_dir="data/channel_cache", + tracking_file=None, + cache_dir=None, ): + if tracking_file is None: + tracking_file = str(get_data_path_manager().get_karaoke_tracking_path()) + if cache_dir is None: + cache_dir = str(get_data_path_manager().get_channel_cache_dir()) + self.tracking_file = Path(tracking_file) self.cache_dir = Path(cache_dir) diff --git a/reset_and_redownload.py b/reset_and_redownload.py index ec53515..e6c3e6f 100644 --- a/reset_and_redownload.py +++ b/reset_and_redownload.py @@ -14,8 +14,12 @@ import shutil from pathlib import Path from typing import List, Dict, Any +from karaoke_downloader.data_path_manager import get_data_path_manager -def reset_karaoke_tracking(tracking_file: str = "data/karaoke_tracking.json") -> None: + +def reset_karaoke_tracking(tracking_file: str = None) -> None: + if tracking_file is None: + tracking_file = str(get_data_path_manager().get_karaoke_tracking_path()) """Reset the karaoke tracking file to empty state.""" print(f"Resetting {tracking_file}...") @@ -52,7 +56,9 @@ def delete_downloaded_files(downloads_dir: str = "downloads") -> None: print(f"❌ Error deleting {downloads_dir}: {e}") -def show_download_stats(tracking_file: str = "data/karaoke_tracking.json") -> None: +def show_download_stats(tracking_file: str = None) -> None: + if tracking_file is None: + tracking_file = str(get_data_path_manager().get_karaoke_tracking_path()) """Show statistics about current downloads.""" if not os.path.exists(tracking_file): print("No tracking file found.")