Signed-off-by: Matt Bruce <mbrucedogs@gmail.com>

This commit is contained in:
Matt Bruce 2025-08-11 09:00:46 -05:00
parent 6a796d8571
commit e34c43a8f4
20 changed files with 38 additions and 310 deletions

20
PRD.md
View File

@ -152,7 +152,8 @@ KaroakeVideoDownloader/
│ ├── check_resolution.py # Resolution checker utility │ ├── check_resolution.py # Resolution checker utility
│ ├── resolution_cli.py # Resolution config CLI │ ├── resolution_cli.py # Resolution config CLI
│ └── tracking_cli.py # Tracking management CLI │ └── tracking_cli.py # Tracking management CLI
├── config.json # Main configuration file ├── config/ # Configuration files
│ └── config.json # Main configuration file
├── data/ # All tracking, cache, and songlist files ├── data/ # All tracking, cache, and songlist files
│ ├── karaoke_tracking.json │ ├── karaoke_tracking.json
│ ├── songlist_tracking.json │ ├── songlist_tracking.json
@ -161,6 +162,17 @@ KaroakeVideoDownloader/
│ ├── channels.txt # Legacy channel list (backward compatibility) │ ├── channels.txt # Legacy channel list (backward compatibility)
│ ├── manual_videos.json # Manual video collection │ ├── manual_videos.json # Manual video collection
│ └── songList.json │ └── songList.json
├── utilities/ # Utility scripts and tools
│ ├── add_manual_video.py # Manual video management
│ ├── build_cache_from_raw.py # Cache building utility
│ ├── cleanup_duplicate_files.py # File cleanup utilities
│ ├── cleanup_recent_tracking.py # Tracking cleanup utilities
│ ├── deduplicate_songlist_tracking.py # Data deduplication
│ ├── fix_artist_name_format.py # Data cleanup utilities
│ ├── fix_artist_name_format_simple.py
│ ├── fix_code_quality.py # Development tools
│ ├── reset_and_redownload.py # Maintenance utilities
│ └── songlist_report.py # Reporting utilities
├── downloads/ # All video output ├── downloads/ # All video output
│ └── [ChannelName]/ # Per-channel folders │ └── [ChannelName]/ # Per-channel folders
├── logs/ # Download logs ├── logs/ # Download logs
@ -566,7 +578,7 @@ python3 src/tests/test_macos.py
## 🔧 Recent Bug Fixes & Improvements (v3.4.7) ## 🔧 Recent Bug Fixes & Improvements (v3.4.7)
### **Configurable Data Directory Path** ### **Configurable Data Directory Path**
- **Centralized Data Path Management**: New `data_path_manager.py` module provides unified data directory path management - **Centralized Data Path Management**: New `data_path_manager.py` module provides unified data directory path management
- **Configurable Location**: Data directory path can be set in `config.json` under `folder_structure.data_dir` - **Configurable Location**: Data directory path can be set in `config/config.json` under `folder_structure.data_dir`
- **Backward Compatibility**: Defaults to "data" directory if not configured - **Backward Compatibility**: Defaults to "data" directory if not configured
- **Cross-Project Integration**: Enables the karaoke downloader to be used as a component in other projects with different data directory structures - **Cross-Project Integration**: Enables the karaoke downloader to be used as a component in other projects with different data directory structures
- **Updated All Modules**: All modules now use the data path manager instead of hardcoded "data/" paths - **Updated All Modules**: All modules now use the data path manager instead of hardcoded "data/" paths
@ -585,8 +597,8 @@ The original implementation had a circular dependency problem:
- **Problem**: `config.json` was located in the `data/` directory - **Problem**: `config.json` was located in the `data/` directory
- **Issue**: To read the config file, we needed to know where the data directory is - **Issue**: To read the config file, we needed to know where the data directory is
- **Conflict**: But the data directory location is specified in the config file - **Conflict**: But the data directory location is specified in the config file
- **Solution**: Moved `config.json` to the root directory as a fixed location - **Solution**: Moved `config.json` to the `config/` directory as a fixed location
- **Result**: Config file is always accessible, and data directory can be configured within it - **Result**: Config file is always accessible in a dedicated config directory, and data directory can be configured within it
- **Backward Compatibility**: System still works with config files in custom data directories when explicitly specified - **Backward Compatibility**: System still works with config files in custom data directories when explicitly specified
## 🔧 Recent Bug Fixes & Improvements (v3.4.6) ## 🔧 Recent Bug Fixes & Improvements (v3.4.6)

View File

@ -31,7 +31,7 @@ The codebase has been comprehensively refactored into a modular architecture wit
### **Configurable Data Directory (v3.4.7)** ### **Configurable Data Directory (v3.4.7)**
- **Centralized Data Path Management**: `data_path_manager.py` provides unified data directory path management - **Centralized Data Path Management**: `data_path_manager.py` provides unified data directory path management
- **Configurable Location**: Data directory path can be set in `config.json` under `folder_structure.data_dir` - **Configurable Location**: Data directory path can be set in `config/config.json` under `folder_structure.data_dir`
- **Backward Compatibility**: Defaults to "data" directory if not configured - **Backward Compatibility**: Defaults to "data" directory if not configured
- **Cross-Project Integration**: Enables the karaoke downloader to be used as a component in other projects with different data directory structures - **Cross-Project Integration**: Enables the karaoke downloader to be used as a component in other projects with different data directory structures
@ -410,13 +410,25 @@ KaroakeVideoDownloader/
│ ├── check_resolution.py # Resolution checker utility │ ├── check_resolution.py # Resolution checker utility
│ ├── resolution_cli.py # Resolution config CLI │ ├── resolution_cli.py # Resolution config CLI
│ └── tracking_cli.py # Tracking management CLI │ └── tracking_cli.py # Tracking management CLI
├── config.json # Main configuration file ├── config/ # Configuration files
│ └── config.json # Main configuration file
├── data/ # All tracking, cache, and songlist files ├── data/ # All tracking, cache, and songlist files
│ ├── karaoke_tracking.json │ ├── karaoke_tracking.json
│ ├── songlist_tracking.json │ ├── songlist_tracking.json
│ ├── channel_cache.json │ ├── channel_cache.json
│ ├── channels.txt │ ├── channels.txt
│ └── songList.json │ └── songList.json
├── utilities/ # Utility scripts and tools
│ ├── add_manual_video.py # Manual video management
│ ├── build_cache_from_raw.py # Cache building utility
│ ├── cleanup_duplicate_files.py # File cleanup utilities
│ ├── cleanup_recent_tracking.py # Tracking cleanup utilities
│ ├── deduplicate_songlist_tracking.py # Data deduplication
│ ├── fix_artist_name_format.py # Data cleanup utilities
│ ├── fix_artist_name_format_simple.py
│ ├── fix_code_quality.py # Development tools
│ ├── reset_and_redownload.py # Maintenance utilities
│ └── songlist_report.py # Reporting utilities
├── downloads/ # All video output ├── downloads/ # All video output
│ └── [ChannelName]/ # Per-channel folders │ └── [ChannelName]/ # Per-channel folders
├── logs/ # Download logs ├── logs/ # Download logs
@ -523,9 +535,9 @@ python download_karaoke.py --generate-unmatched-report --fuzzy-match --fuzzy-thr
- Removes `.info.json` and `.meta` files after download - Removes `.info.json` and `.meta` files after download
## 🛠️ Configuration ## 🛠️ Configuration
- All options are in `config.json` (format, resolution, metadata, etc.) - All options are in `config/config.json` (format, resolution, metadata, etc.)
- You can edit this file or use CLI flags to override - You can edit this file or use CLI flags to override
- **Configurable Data Directory**: The data directory path can be configured in `config.json` under `folder_structure.data_dir` (default: "data") - **Configurable Data Directory**: The data directory path can be configured in `config/config.json` under `folder_structure.data_dir` (default: "data")
## 📋 Command Reference File ## 📋 Command Reference File

View File

@ -37,13 +37,13 @@ python download_karaoke.py --manual --songlist-only --limit 10
python download_karaoke.py --manual --force --limit 5 python download_karaoke.py --manual --force --limit 5
# Add a video to manual collection (interactive) # Add a video to manual collection (interactive)
python add_manual_video.py add "Artist - Song Title (Karaoke Version)" "https://www.youtube.com/watch?v=VIDEO_ID" python utilities/add_manual_video.py add "Artist - Song Title (Karaoke Version)" "https://www.youtube.com/watch?v=VIDEO_ID"
# List all manual videos # List all manual videos
python add_manual_video.py list python utilities/add_manual_video.py list
# Remove a video from manual collection # Remove a video from manual collection
python add_manual_video.py remove "Artist - Song Title (Karaoke Version)" python utilities/add_manual_video.py remove "Artist - Song Title (Karaoke Version)"
## 🎬 ALL VIDEOS DOWNLOAD MODE (v3.4.4) ## 🎬 ALL VIDEOS DOWNLOAD MODE (v3.4.4)

View File

@ -1,6 +0,0 @@
https://www.youtube.com/@SingKingKaraoke/videos
https://www.youtube.com/@KaraokeOnVEVO/videos
https://www.youtube.com/@StingrayKaraoke/videos
https://www.youtube.com/@sing2karaoke/videos
https://www.youtube.com/@ZoomKaraokeOfficial/videos
https://www.youtube.com/@VocalStarKaraoke/videos

View File

@ -1,78 +0,0 @@
{
"timestamp": "2025-08-05T16:01:09.018725",
"download_plan": [
{
"video_id": "oHV8Iw0R4BY",
"artist": "Shaboozey, Jelly Roll",
"title": "Amen",
"filename": "Shaboozey, Jelly Roll - Amen.mp4",
"channel_name": "@SingKingKaraoke",
"video_title": "Shaboozey, Jelly Roll - Amen (Karaoke Version)",
"force_download": false
},
{
"video_id": "Jm3a-VAomH0",
"artist": "Pet Shop Boys",
"title": "Domino Dancing",
"filename": "Pet Shop Boys - Domino Dancing.mp4",
"channel_name": "@KaraokeOnVEVO",
"video_title": "Pet Shop Boys - Domino Dancing (Karaoke)",
"force_download": false
},
{
"video_id": "6Vb0igX0-Ss",
"artist": "Chappell Roan",
"title": "The Giver",
"filename": "Chappell Roan - The Giver.mp4",
"channel_name": "@StingrayKaraoke",
"video_title": "Chappell Roan - The Giver (Karaoke Version)",
"force_download": false
},
{
"video_id": "b1k2_B9oCr4",
"artist": "James Arthur",
"title": "Train Wreck",
"filename": "James Arthur - Train Wreck.mp4",
"channel_name": "@sing2karaoke",
"video_title": "James Arthur Train Wreck",
"force_download": false
},
{
"video_id": "cg10FeEYSSQ",
"artist": "Caesars",
"title": "Jerk It Out",
"filename": "Caesars - Jerk It Out.mp4",
"channel_name": "@ZoomKaraokeOfficial",
"video_title": "Caesars - Jerk It Out - Karaoke Version from Zoom Karaoke",
"force_download": false
},
{
"video_id": "m51bbu2ghp4",
"artist": "Jin",
"title": "Don't Say You Love Me",
"filename": "Jin - Dont Say You Love Me.mp4",
"channel_name": "@VocalStarKaraoke",
"video_title": "Don't Say You Love Me - Jin KARAOKE With Vocal Guide",
"force_download": false
},
{
"video_id": "qegLWI99Wg0",
"artist": "Ed Sheeran & Beyoncé",
"title": "Perfect Duet",
"filename": "Ed Sheeran & Beyoncé - Perfect Duet.mp4",
"channel_name": "Unknown",
"video_title": "Ed Sheeran & Beyoncé - Perfect Duet",
"force_download": false
},
{
"video_id": "ZbWHuncTgsM",
"artist": "Sia",
"title": "Snowman | Karaoke (instrumental)",
"filename": "Sia - Snowman Karaoke (instrumental).mp4",
"channel_name": "@LetsSingKaraoke",
"video_title": "Sia - Snowman | Karaoke (instrumental)",
"force_download": false
}
],
"unmatched": []
}

View File

@ -1,12 +0,0 @@
{
"generated_at": "2025-08-05T16:07:48.031279",
"total_unmatched": 1,
"unmatched_songs": [
{
"artist": "SZA",
"title": "30 For 30",
"position": 3,
"search_key": "sza_30 for 30"
}
]
}

View File

@ -1,198 +0,0 @@
#!/usr/bin/env python3
"""
Example: Using Karaoke Downloader with Custom Data Directory
This example demonstrates how to integrate the karaoke downloader into another project
with a different data directory structure.
"""
import os
import tempfile
from pathlib import Path
from karaoke_downloader.data_path_manager import get_data_path_manager
from karaoke_downloader.config_manager import get_config_manager
from karaoke_downloader.downloader import KaraokeDownloader
def example_custom_data_directory():
"""Example of using the karaoke downloader with a custom data directory."""
print("🚀 Example: Custom Data Directory Integration")
print("=" * 50)
# Create a temporary directory to simulate a different project structure
with tempfile.TemporaryDirectory() as temp_dir:
project_root = Path(temp_dir) / "my_karaoke_project"
project_root.mkdir(exist_ok=True)
# Set up custom data directory structure
custom_data_dir = project_root / "karaoke_data"
custom_data_dir.mkdir(exist_ok=True)
print(f"📁 Project root: {project_root}")
print(f"📁 Custom data directory: {custom_data_dir}")
# Create a custom config file (in the custom data directory for this example)
config_file = custom_data_dir / "config.json"
config_data = {
"folder_structure": {
"data_dir": str(custom_data_dir),
"downloads_dir": str(project_root / "downloads"),
"logs_dir": str(project_root / "logs")
},
"download_settings": {
"preferred_resolution": "720p"
}
}
import json
with open(config_file, 'w') as f:
json.dump(config_data, f, indent=2)
print(f"📄 Created config file: {config_file}")
# Example 1: Using data path manager with custom directory
print("\n📋 Example 1: Data Path Manager")
data_path_manager = get_data_path_manager(str(custom_data_dir))
print(f" Data directory: {data_path_manager.data_dir}")
print(f" Songlist path: {data_path_manager.get_songlist_path()}")
print(f" Channels path: {data_path_manager.get_channels_json_path()}")
# Example 2: Using config manager with custom directory
print("\n📋 Example 2: Config Manager")
config_manager = get_config_manager(str(custom_data_dir))
config = config_manager.get_config()
print(f" Config loaded from: {config_manager.config_file}")
print(f" Downloads directory: {config.folder_structure.downloads_dir}")
print(f" Logs directory: {config.folder_structure.logs_dir}")
print(f" Resolution: {config.download_settings.preferred_resolution}")
# Example 3: Using downloader with custom directory
print("\n📋 Example 3: Karaoke Downloader")
try:
downloader = KaraokeDownloader()
print(f" Downloader initialized successfully")
print(f" Downloads directory: {downloader.downloads_dir}")
print(f" Logs directory: {downloader.logs_dir}")
except Exception as e:
print(f" Downloader initialization failed (expected): {e}")
# Example 4: Creating sample data files
print("\n📋 Example 4: Sample Data Files")
# Create a sample channels file
channels_file = data_path_manager.get_channels_json_path()
channels_data = {
"channels": [
{
"name": "SingKingKaraoke",
"url": "https://www.youtube.com/@SingKingKaraoke/videos",
"parsing_rules": {
"format": "artist_title_separator",
"separator": " - ",
"artist_first": True
}
}
]
}
with open(channels_file, 'w') as f:
json.dump(channels_data, f, indent=2)
print(f" Created channels file: {channels_file}")
# Create a sample songlist file
songlist_file = data_path_manager.get_songlist_path()
songlist_data = [
{
"title": "Sample Playlist",
"songs": [
{"artist": "Artist 1", "title": "Song 1", "position": 1},
{"artist": "Artist 2", "title": "Song 2", "position": 2}
]
}
]
with open(songlist_file, 'w') as f:
json.dump(songlist_data, f, indent=2)
print(f" Created songlist file: {songlist_file}")
# List all files in the custom data directory
print(f"\n📋 Files in custom data directory:")
for file_path in custom_data_dir.iterdir():
if file_path.is_file():
print(f" - {file_path.name}")
print(f"\n✅ Example completed successfully!")
print(f"📁 All data files are in: {custom_data_dir}")
def example_integration_pattern():
"""Example of integration pattern for other projects."""
print("\n🔧 Integration Pattern for Other Projects")
print("=" * 50)
print("""
# Integration Pattern:
1. Set up your project structure:
my_project/
karaoke_data/ # Custom data directory
config.json # Configuration
channels.json # Channel definitions
songList.json # Song lists
...
downloads/ # Downloaded videos
logs/ # Log files
main.py # Your main application
2. Initialize with custom data directory:
```python
from karaoke_downloader.data_path_manager import get_data_path_manager
from karaoke_downloader.downloader import KaraokeDownloader
# Set up custom data directory
custom_data_dir = "path/to/your/karaoke_data"
# Get data path manager
data_path_manager = get_data_path_manager(custom_data_dir)
# Initialize downloader (it will use the custom data directory)
downloader = KaraokeDownloader()
# Use the downloader
downloader.download_songlist_across_channels(
channel_urls=["https://www.youtube.com/@SingKingKaraoke/videos"],
limit=5
)
```
3. Configuration file (config.json in root, or karaoke_data/config.json for custom data directory):
```json
{
"folder_structure": {
"data_dir": "path/to/your/karaoke_data",
"downloads_dir": "path/to/your/downloads",
"logs_dir": "path/to/your/logs"
},
"download_settings": {
"preferred_resolution": "720p"
}
}
```
""")
def main():
"""Run the examples."""
example_custom_data_directory()
example_integration_pattern()
if __name__ == "__main__":
main()

View File

@ -70,12 +70,10 @@ def load_channels_from_text(channels_file: str = None) -> List[str]:
def load_channels(channel_file: str = None) -> List[str]: def load_channels(channel_file: str = None) -> List[str]:
"""Load channel URLs from file.""" """Load channel URLs from file."""
if channel_file is None: if channel_file is None:
# Try JSON first, then fall back to text # Use JSON configuration
data_path_manager = get_data_path_manager() data_path_manager = get_data_path_manager()
if data_path_manager.file_exists("channels.json"): if data_path_manager.file_exists("channels.json"):
return load_channels_from_json() return load_channels_from_json()
elif data_path_manager.file_exists("channels.txt"):
return load_channels_from_text()
else: else:
return [] return []
else: else:

View File

@ -167,7 +167,7 @@ class ConfigManager:
Manages application configuration with loading, validation, and caching. Manages application configuration with loading, validation, and caching.
""" """
def __init__(self, config_file: Union[str, Path] = "config.json", data_dir: Optional[str] = None): def __init__(self, config_file: Union[str, Path] = "config/config.json", data_dir: Optional[str] = None):
""" """
Initialize the configuration manager. Initialize the configuration manager.
@ -356,7 +356,7 @@ def get_config_manager(config_file: Optional[Union[str, Path]] = None, data_dir:
global _config_manager global _config_manager
if _config_manager is None or config_file is not None or data_dir is not None: if _config_manager is None or config_file is not None or data_dir is not None:
if config_file is None: if config_file is None:
config_file = "config.json" config_file = "config/config.json"
_config_manager = ConfigManager(config_file, data_dir) _config_manager = ConfigManager(config_file, data_dir)
return _config_manager return _config_manager