diff --git a/PRD.md b/PRD.md index 48389a6..42c1ca7 100644 --- a/PRD.md +++ b/PRD.md @@ -152,7 +152,8 @@ KaroakeVideoDownloader/ │ ├── check_resolution.py # Resolution checker utility │ ├── resolution_cli.py # Resolution config CLI │ └── tracking_cli.py # Tracking management CLI -├── config.json # Main configuration file +├── config/ # Configuration files +│ └── config.json # Main configuration file ├── data/ # All tracking, cache, and songlist files │ ├── karaoke_tracking.json │ ├── songlist_tracking.json @@ -161,6 +162,17 @@ KaroakeVideoDownloader/ │ ├── channels.txt # Legacy channel list (backward compatibility) │ ├── manual_videos.json # Manual video collection │ └── songList.json +├── utilities/ # Utility scripts and tools +│ ├── add_manual_video.py # Manual video management +│ ├── build_cache_from_raw.py # Cache building utility +│ ├── cleanup_duplicate_files.py # File cleanup utilities +│ ├── cleanup_recent_tracking.py # Tracking cleanup utilities +│ ├── deduplicate_songlist_tracking.py # Data deduplication +│ ├── fix_artist_name_format.py # Data cleanup utilities +│ ├── fix_artist_name_format_simple.py +│ ├── fix_code_quality.py # Development tools +│ ├── reset_and_redownload.py # Maintenance utilities +│ └── songlist_report.py # Reporting utilities ├── downloads/ # All video output │ └── [ChannelName]/ # Per-channel folders ├── logs/ # Download logs @@ -566,7 +578,7 @@ python3 src/tests/test_macos.py ## 🔧 Recent Bug Fixes & Improvements (v3.4.7) ### **Configurable Data Directory Path** - **Centralized Data Path Management**: New `data_path_manager.py` module provides unified data directory path management -- **Configurable Location**: Data directory path can be set in `config.json` under `folder_structure.data_dir` +- **Configurable Location**: Data directory path can be set in `config/config.json` under `folder_structure.data_dir` - **Backward Compatibility**: Defaults to "data" directory if not configured - **Cross-Project Integration**: Enables the karaoke downloader to be used as a component in other projects with different data directory structures - **Updated All Modules**: All modules now use the data path manager instead of hardcoded "data/" paths @@ -585,8 +597,8 @@ The original implementation had a circular dependency problem: - **Problem**: `config.json` was located in the `data/` directory - **Issue**: To read the config file, we needed to know where the data directory is - **Conflict**: But the data directory location is specified in the config file -- **Solution**: Moved `config.json` to the root directory as a fixed location -- **Result**: Config file is always accessible, and data directory can be configured within it +- **Solution**: Moved `config.json` to the `config/` directory as a fixed location +- **Result**: Config file is always accessible in a dedicated config directory, and data directory can be configured within it - **Backward Compatibility**: System still works with config files in custom data directories when explicitly specified ## 🔧 Recent Bug Fixes & Improvements (v3.4.6) diff --git a/README.md b/README.md index 8a0174f..5568d47 100644 --- a/README.md +++ b/README.md @@ -31,7 +31,7 @@ The codebase has been comprehensively refactored into a modular architecture wit ### **Configurable Data Directory (v3.4.7)** - **Centralized Data Path Management**: `data_path_manager.py` provides unified data directory path management -- **Configurable Location**: Data directory path can be set in `config.json` under `folder_structure.data_dir` +- **Configurable Location**: Data directory path can be set in `config/config.json` under `folder_structure.data_dir` - **Backward Compatibility**: Defaults to "data" directory if not configured - **Cross-Project Integration**: Enables the karaoke downloader to be used as a component in other projects with different data directory structures @@ -410,13 +410,25 @@ KaroakeVideoDownloader/ │ ├── check_resolution.py # Resolution checker utility │ ├── resolution_cli.py # Resolution config CLI │ └── tracking_cli.py # Tracking management CLI -├── config.json # Main configuration file +├── config/ # Configuration files +│ └── config.json # Main configuration file ├── data/ # All tracking, cache, and songlist files │ ├── karaoke_tracking.json │ ├── songlist_tracking.json │ ├── channel_cache.json │ ├── channels.txt │ └── songList.json +├── utilities/ # Utility scripts and tools +│ ├── add_manual_video.py # Manual video management +│ ├── build_cache_from_raw.py # Cache building utility +│ ├── cleanup_duplicate_files.py # File cleanup utilities +│ ├── cleanup_recent_tracking.py # Tracking cleanup utilities +│ ├── deduplicate_songlist_tracking.py # Data deduplication +│ ├── fix_artist_name_format.py # Data cleanup utilities +│ ├── fix_artist_name_format_simple.py +│ ├── fix_code_quality.py # Development tools +│ ├── reset_and_redownload.py # Maintenance utilities +│ └── songlist_report.py # Reporting utilities ├── downloads/ # All video output │ └── [ChannelName]/ # Per-channel folders ├── logs/ # Download logs @@ -523,9 +535,9 @@ python download_karaoke.py --generate-unmatched-report --fuzzy-match --fuzzy-thr - Removes `.info.json` and `.meta` files after download ## 🛠️ Configuration -- All options are in `config.json` (format, resolution, metadata, etc.) +- All options are in `config/config.json` (format, resolution, metadata, etc.) - You can edit this file or use CLI flags to override -- **Configurable Data Directory**: The data directory path can be configured in `config.json` under `folder_structure.data_dir` (default: "data") +- **Configurable Data Directory**: The data directory path can be configured in `config/config.json` under `folder_structure.data_dir` (default: "data") ## 📋 Command Reference File diff --git a/commands.txt b/commands.txt index d45aaf0..c543bca 100644 --- a/commands.txt +++ b/commands.txt @@ -37,13 +37,13 @@ python download_karaoke.py --manual --songlist-only --limit 10 python download_karaoke.py --manual --force --limit 5 # Add a video to manual collection (interactive) -python add_manual_video.py add "Artist - Song Title (Karaoke Version)" "https://www.youtube.com/watch?v=VIDEO_ID" +python utilities/add_manual_video.py add "Artist - Song Title (Karaoke Version)" "https://www.youtube.com/watch?v=VIDEO_ID" # List all manual videos -python add_manual_video.py list +python utilities/add_manual_video.py list # Remove a video from manual collection -python add_manual_video.py remove "Artist - Song Title (Karaoke Version)" +python utilities/add_manual_video.py remove "Artist - Song Title (Karaoke Version)" ## 🎬 ALL VIDEOS DOWNLOAD MODE (v3.4.4) diff --git a/config.json b/config/config.json similarity index 100% rename from config.json rename to config/config.json diff --git a/data/channels.txt b/data/channels.txt deleted file mode 100644 index d1cf666..0000000 --- a/data/channels.txt +++ /dev/null @@ -1,6 +0,0 @@ -https://www.youtube.com/@SingKingKaraoke/videos -https://www.youtube.com/@KaraokeOnVEVO/videos -https://www.youtube.com/@StingrayKaraoke/videos -https://www.youtube.com/@sing2karaoke/videos -https://www.youtube.com/@ZoomKaraokeOfficial/videos -https://www.youtube.com/@VocalStarKaraoke/videos \ No newline at end of file diff --git a/data/plan_latest_per_channel_channels8_hash903857ec_force_downloadFalse_limit_per_channel2.json b/data/plan_latest_per_channel_channels8_hash903857ec_force_downloadFalse_limit_per_channel2.json deleted file mode 100644 index 6516751..0000000 --- a/data/plan_latest_per_channel_channels8_hash903857ec_force_downloadFalse_limit_per_channel2.json +++ /dev/null @@ -1,78 +0,0 @@ -{ - "timestamp": "2025-08-05T16:01:09.018725", - "download_plan": [ - { - "video_id": "oHV8Iw0R4BY", - "artist": "Shaboozey, Jelly Roll", - "title": "Amen", - "filename": "Shaboozey, Jelly Roll - Amen.mp4", - "channel_name": "@SingKingKaraoke", - "video_title": "Shaboozey, Jelly Roll - Amen (Karaoke Version)", - "force_download": false - }, - { - "video_id": "Jm3a-VAomH0", - "artist": "Pet Shop Boys", - "title": "Domino Dancing", - "filename": "Pet Shop Boys - Domino Dancing.mp4", - "channel_name": "@KaraokeOnVEVO", - "video_title": "Pet Shop Boys - Domino Dancing (Karaoke)", - "force_download": false - }, - { - "video_id": "6Vb0igX0-Ss", - "artist": "Chappell Roan", - "title": "The Giver", - "filename": "Chappell Roan - The Giver.mp4", - "channel_name": "@StingrayKaraoke", - "video_title": "Chappell Roan - The Giver (Karaoke Version)", - "force_download": false - }, - { - "video_id": "b1k2_B9oCr4", - "artist": "James Arthur", - "title": "Train Wreck", - "filename": "James Arthur - Train Wreck.mp4", - "channel_name": "@sing2karaoke", - "video_title": "James Arthur Train Wreck", - "force_download": false - }, - { - "video_id": "cg10FeEYSSQ", - "artist": "Caesars", - "title": "Jerk It Out", - "filename": "Caesars - Jerk It Out.mp4", - "channel_name": "@ZoomKaraokeOfficial", - "video_title": "Caesars - Jerk It Out - Karaoke Version from Zoom Karaoke", - "force_download": false - }, - { - "video_id": "m51bbu2ghp4", - "artist": "Jin", - "title": "Don't Say You Love Me", - "filename": "Jin - Dont Say You Love Me.mp4", - "channel_name": "@VocalStarKaraoke", - "video_title": "Don't Say You Love Me - Jin KARAOKE With Vocal Guide", - "force_download": false - }, - { - "video_id": "qegLWI99Wg0", - "artist": "Ed Sheeran & Beyoncé", - "title": "Perfect Duet", - "filename": "Ed Sheeran & Beyoncé - Perfect Duet.mp4", - "channel_name": "Unknown", - "video_title": "Ed Sheeran & Beyoncé - Perfect Duet", - "force_download": false - }, - { - "video_id": "ZbWHuncTgsM", - "artist": "Sia", - "title": "Snowman | Karaoke (instrumental)", - "filename": "Sia - Snowman Karaoke (instrumental).mp4", - "channel_name": "@LetsSingKaraoke", - "video_title": "Sia - Snowman | Karaoke (instrumental)", - "force_download": false - } - ], - "unmatched": [] -} \ No newline at end of file diff --git a/data/unmatched_songs_report_20250805_160748.json b/data/unmatched_songs_report_20250805_160748.json deleted file mode 100644 index b4f05d6..0000000 --- a/data/unmatched_songs_report_20250805_160748.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "generated_at": "2025-08-05T16:07:48.031279", - "total_unmatched": 1, - "unmatched_songs": [ - { - "artist": "SZA", - "title": "30 For 30", - "position": 3, - "search_key": "sza_30 for 30" - } - ] -} \ No newline at end of file diff --git a/example_custom_data_directory.py b/example_custom_data_directory.py deleted file mode 100644 index 22cab8c..0000000 --- a/example_custom_data_directory.py +++ /dev/null @@ -1,198 +0,0 @@ -#!/usr/bin/env python3 -""" -Example: Using Karaoke Downloader with Custom Data Directory - -This example demonstrates how to integrate the karaoke downloader into another project -with a different data directory structure. -""" - -import os -import tempfile -from pathlib import Path - -from karaoke_downloader.data_path_manager import get_data_path_manager -from karaoke_downloader.config_manager import get_config_manager -from karaoke_downloader.downloader import KaraokeDownloader - - -def example_custom_data_directory(): - """Example of using the karaoke downloader with a custom data directory.""" - - print("🚀 Example: Custom Data Directory Integration") - print("=" * 50) - - # Create a temporary directory to simulate a different project structure - with tempfile.TemporaryDirectory() as temp_dir: - project_root = Path(temp_dir) / "my_karaoke_project" - project_root.mkdir(exist_ok=True) - - # Set up custom data directory structure - custom_data_dir = project_root / "karaoke_data" - custom_data_dir.mkdir(exist_ok=True) - - print(f"📁 Project root: {project_root}") - print(f"📁 Custom data directory: {custom_data_dir}") - - # Create a custom config file (in the custom data directory for this example) - config_file = custom_data_dir / "config.json" - config_data = { - "folder_structure": { - "data_dir": str(custom_data_dir), - "downloads_dir": str(project_root / "downloads"), - "logs_dir": str(project_root / "logs") - }, - "download_settings": { - "preferred_resolution": "720p" - } - } - - import json - with open(config_file, 'w') as f: - json.dump(config_data, f, indent=2) - - print(f"📄 Created config file: {config_file}") - - # Example 1: Using data path manager with custom directory - print("\n📋 Example 1: Data Path Manager") - data_path_manager = get_data_path_manager(str(custom_data_dir)) - - print(f" Data directory: {data_path_manager.data_dir}") - print(f" Songlist path: {data_path_manager.get_songlist_path()}") - print(f" Channels path: {data_path_manager.get_channels_json_path()}") - - # Example 2: Using config manager with custom directory - print("\n📋 Example 2: Config Manager") - config_manager = get_config_manager(str(custom_data_dir)) - config = config_manager.get_config() - - print(f" Config loaded from: {config_manager.config_file}") - print(f" Downloads directory: {config.folder_structure.downloads_dir}") - print(f" Logs directory: {config.folder_structure.logs_dir}") - print(f" Resolution: {config.download_settings.preferred_resolution}") - - # Example 3: Using downloader with custom directory - print("\n📋 Example 3: Karaoke Downloader") - try: - downloader = KaraokeDownloader() - print(f" Downloader initialized successfully") - print(f" Downloads directory: {downloader.downloads_dir}") - print(f" Logs directory: {downloader.logs_dir}") - except Exception as e: - print(f" Downloader initialization failed (expected): {e}") - - # Example 4: Creating sample data files - print("\n📋 Example 4: Sample Data Files") - - # Create a sample channels file - channels_file = data_path_manager.get_channels_json_path() - channels_data = { - "channels": [ - { - "name": "SingKingKaraoke", - "url": "https://www.youtube.com/@SingKingKaraoke/videos", - "parsing_rules": { - "format": "artist_title_separator", - "separator": " - ", - "artist_first": True - } - } - ] - } - - with open(channels_file, 'w') as f: - json.dump(channels_data, f, indent=2) - - print(f" Created channels file: {channels_file}") - - # Create a sample songlist file - songlist_file = data_path_manager.get_songlist_path() - songlist_data = [ - { - "title": "Sample Playlist", - "songs": [ - {"artist": "Artist 1", "title": "Song 1", "position": 1}, - {"artist": "Artist 2", "title": "Song 2", "position": 2} - ] - } - ] - - with open(songlist_file, 'w') as f: - json.dump(songlist_data, f, indent=2) - - print(f" Created songlist file: {songlist_file}") - - # List all files in the custom data directory - print(f"\n📋 Files in custom data directory:") - for file_path in custom_data_dir.iterdir(): - if file_path.is_file(): - print(f" - {file_path.name}") - - print(f"\n✅ Example completed successfully!") - print(f"📁 All data files are in: {custom_data_dir}") - - -def example_integration_pattern(): - """Example of integration pattern for other projects.""" - - print("\n🔧 Integration Pattern for Other Projects") - print("=" * 50) - - print(""" -# Integration Pattern: - -1. Set up your project structure: - my_project/ - ├── karaoke_data/ # Custom data directory - │ ├── config.json # Configuration - │ ├── channels.json # Channel definitions - │ ├── songList.json # Song lists - │ └── ... - ├── downloads/ # Downloaded videos - ├── logs/ # Log files - └── main.py # Your main application - -2. Initialize with custom data directory: - ```python - from karaoke_downloader.data_path_manager import get_data_path_manager - from karaoke_downloader.downloader import KaraokeDownloader - - # Set up custom data directory - custom_data_dir = "path/to/your/karaoke_data" - - # Get data path manager - data_path_manager = get_data_path_manager(custom_data_dir) - - # Initialize downloader (it will use the custom data directory) - downloader = KaraokeDownloader() - - # Use the downloader - downloader.download_songlist_across_channels( - channel_urls=["https://www.youtube.com/@SingKingKaraoke/videos"], - limit=5 - ) - ``` - -3. Configuration file (config.json in root, or karaoke_data/config.json for custom data directory): - ```json - { - "folder_structure": { - "data_dir": "path/to/your/karaoke_data", - "downloads_dir": "path/to/your/downloads", - "logs_dir": "path/to/your/logs" - }, - "download_settings": { - "preferred_resolution": "720p" - } - } - ``` -""") - - -def main(): - """Run the examples.""" - example_custom_data_directory() - example_integration_pattern() - - -if __name__ == "__main__": - main() diff --git a/karaoke_downloader/cli.py b/karaoke_downloader/cli.py index 1352040..852dc50 100644 --- a/karaoke_downloader/cli.py +++ b/karaoke_downloader/cli.py @@ -70,12 +70,10 @@ def load_channels_from_text(channels_file: str = None) -> List[str]: def load_channels(channel_file: str = None) -> List[str]: """Load channel URLs from file.""" if channel_file is None: - # Try JSON first, then fall back to text + # Use JSON configuration data_path_manager = get_data_path_manager() if data_path_manager.file_exists("channels.json"): return load_channels_from_json() - elif data_path_manager.file_exists("channels.txt"): - return load_channels_from_text() else: return [] else: diff --git a/karaoke_downloader/config_manager.py b/karaoke_downloader/config_manager.py index 23a6e20..1330ef6 100644 --- a/karaoke_downloader/config_manager.py +++ b/karaoke_downloader/config_manager.py @@ -167,7 +167,7 @@ class ConfigManager: Manages application configuration with loading, validation, and caching. """ - def __init__(self, config_file: Union[str, Path] = "config.json", data_dir: Optional[str] = None): + def __init__(self, config_file: Union[str, Path] = "config/config.json", data_dir: Optional[str] = None): """ Initialize the configuration manager. @@ -356,7 +356,7 @@ def get_config_manager(config_file: Optional[Union[str, Path]] = None, data_dir: global _config_manager if _config_manager is None or config_file is not None or data_dir is not None: if config_file is None: - config_file = "config.json" + config_file = "config/config.json" _config_manager = ConfigManager(config_file, data_dir) return _config_manager diff --git a/add_manual_video.py b/utilities/add_manual_video.py similarity index 100% rename from add_manual_video.py rename to utilities/add_manual_video.py diff --git a/build_cache_from_raw.py b/utilities/build_cache_from_raw.py similarity index 100% rename from build_cache_from_raw.py rename to utilities/build_cache_from_raw.py diff --git a/data/cleanup_duplicate_files.py b/utilities/cleanup_duplicate_files.py similarity index 100% rename from data/cleanup_duplicate_files.py rename to utilities/cleanup_duplicate_files.py diff --git a/data/cleanup_recent_tracking.py b/utilities/cleanup_recent_tracking.py similarity index 100% rename from data/cleanup_recent_tracking.py rename to utilities/cleanup_recent_tracking.py diff --git a/data/deduplicate_songlist_tracking.py b/utilities/deduplicate_songlist_tracking.py similarity index 100% rename from data/deduplicate_songlist_tracking.py rename to utilities/deduplicate_songlist_tracking.py diff --git a/fix_artist_name_format.py b/utilities/fix_artist_name_format.py similarity index 100% rename from fix_artist_name_format.py rename to utilities/fix_artist_name_format.py diff --git a/fix_artist_name_format_simple.py b/utilities/fix_artist_name_format_simple.py similarity index 100% rename from fix_artist_name_format_simple.py rename to utilities/fix_artist_name_format_simple.py diff --git a/fix_code_quality.py b/utilities/fix_code_quality.py similarity index 100% rename from fix_code_quality.py rename to utilities/fix_code_quality.py diff --git a/reset_and_redownload.py b/utilities/reset_and_redownload.py similarity index 100% rename from reset_and_redownload.py rename to utilities/reset_and_redownload.py diff --git a/data/songlist_report.py b/utilities/songlist_report.py similarity index 100% rename from data/songlist_report.py rename to utilities/songlist_report.py