Compare commits
2 Commits
b0eb76930a
...
e34c43a8f4
| Author | SHA1 | Date | |
|---|---|---|---|
| e34c43a8f4 | |||
| 6a796d8571 |
42
PRD.md
42
PRD.md
@ -152,8 +152,9 @@ KaroakeVideoDownloader/
|
|||||||
│ ├── check_resolution.py # Resolution checker utility
|
│ ├── check_resolution.py # Resolution checker utility
|
||||||
│ ├── resolution_cli.py # Resolution config CLI
|
│ ├── resolution_cli.py # Resolution config CLI
|
||||||
│ └── tracking_cli.py # Tracking management CLI
|
│ └── tracking_cli.py # Tracking management CLI
|
||||||
├── data/ # All config, tracking, cache, and songlist files
|
├── config/ # Configuration files
|
||||||
│ ├── config.json
|
│ └── config.json # Main configuration file
|
||||||
|
├── data/ # All tracking, cache, and songlist files
|
||||||
│ ├── karaoke_tracking.json
|
│ ├── karaoke_tracking.json
|
||||||
│ ├── songlist_tracking.json
|
│ ├── songlist_tracking.json
|
||||||
│ ├── channel_cache.json
|
│ ├── channel_cache.json
|
||||||
@ -161,6 +162,17 @@ KaroakeVideoDownloader/
|
|||||||
│ ├── channels.txt # Legacy channel list (backward compatibility)
|
│ ├── channels.txt # Legacy channel list (backward compatibility)
|
||||||
│ ├── manual_videos.json # Manual video collection
|
│ ├── manual_videos.json # Manual video collection
|
||||||
│ └── songList.json
|
│ └── songList.json
|
||||||
|
├── utilities/ # Utility scripts and tools
|
||||||
|
│ ├── add_manual_video.py # Manual video management
|
||||||
|
│ ├── build_cache_from_raw.py # Cache building utility
|
||||||
|
│ ├── cleanup_duplicate_files.py # File cleanup utilities
|
||||||
|
│ ├── cleanup_recent_tracking.py # Tracking cleanup utilities
|
||||||
|
│ ├── deduplicate_songlist_tracking.py # Data deduplication
|
||||||
|
│ ├── fix_artist_name_format.py # Data cleanup utilities
|
||||||
|
│ ├── fix_artist_name_format_simple.py
|
||||||
|
│ ├── fix_code_quality.py # Development tools
|
||||||
|
│ ├── reset_and_redownload.py # Maintenance utilities
|
||||||
|
│ └── songlist_report.py # Reporting utilities
|
||||||
├── downloads/ # All video output
|
├── downloads/ # All video output
|
||||||
│ └── [ChannelName]/ # Per-channel folders
|
│ └── [ChannelName]/ # Per-channel folders
|
||||||
├── logs/ # Download logs
|
├── logs/ # Download logs
|
||||||
@ -563,6 +575,32 @@ python3 src/tests/test_macos.py
|
|||||||
# 3. Install FFmpeg: brew install ffmpeg
|
# 3. Install FFmpeg: brew install ffmpeg
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## 🔧 Recent Bug Fixes & Improvements (v3.4.7)
|
||||||
|
### **Configurable Data Directory Path**
|
||||||
|
- **Centralized Data Path Management**: New `data_path_manager.py` module provides unified data directory path management
|
||||||
|
- **Configurable Location**: Data directory path can be set in `config/config.json` under `folder_structure.data_dir`
|
||||||
|
- **Backward Compatibility**: Defaults to "data" directory if not configured
|
||||||
|
- **Cross-Project Integration**: Enables the karaoke downloader to be used as a component in other projects with different data directory structures
|
||||||
|
- **Updated All Modules**: All modules now use the data path manager instead of hardcoded "data/" paths
|
||||||
|
- **Utility Functions**: Provides `get_data_path()`, `get_data_dir()`, and `get_data_path_manager()` functions for easy access
|
||||||
|
- **Fixed Circular Dependency**: Moved `config.json` from `data/` to root directory to resolve chicken-and-egg problem
|
||||||
|
|
||||||
|
### **Benefits of Configurable Data Directory**
|
||||||
|
- **Flexible Deployment**: Can be integrated into other projects with different directory structures
|
||||||
|
- **Centralized Configuration**: Single point of configuration for all data file paths
|
||||||
|
- **Maintainable Code**: Eliminates hardcoded paths throughout the codebase
|
||||||
|
- **Easy Testing**: Can use temporary directories for testing without affecting production data
|
||||||
|
- **Future-Proof**: Makes it easier to change data directory structure in the future
|
||||||
|
|
||||||
|
### **Circular Dependency Solution**
|
||||||
|
The original implementation had a circular dependency problem:
|
||||||
|
- **Problem**: `config.json` was located in the `data/` directory
|
||||||
|
- **Issue**: To read the config file, we needed to know where the data directory is
|
||||||
|
- **Conflict**: But the data directory location is specified in the config file
|
||||||
|
- **Solution**: Moved `config.json` to the `config/` directory as a fixed location
|
||||||
|
- **Result**: Config file is always accessible in a dedicated config directory, and data directory can be configured within it
|
||||||
|
- **Backward Compatibility**: System still works with config files in custom data directories when explicitly specified
|
||||||
|
|
||||||
## 🔧 Recent Bug Fixes & Improvements (v3.4.6)
|
## 🔧 Recent Bug Fixes & Improvements (v3.4.6)
|
||||||
### **Dry Run Mode**
|
### **Dry Run Mode**
|
||||||
- **New `--dry-run` parameter**: Build download plan and show what would be downloaded without actually downloading anything
|
- **New `--dry-run` parameter**: Build download plan and show what would be downloaded without actually downloading anything
|
||||||
|
|||||||
28
README.md
28
README.md
@ -29,6 +29,12 @@ A Python-based cross-platform CLI tool to download karaoke videos from YouTube c
|
|||||||
## 🏗️ Architecture
|
## 🏗️ Architecture
|
||||||
The codebase has been comprehensively refactored into a modular architecture with centralized utilities for improved maintainability, error handling, and code reuse:
|
The codebase has been comprehensively refactored into a modular architecture with centralized utilities for improved maintainability, error handling, and code reuse:
|
||||||
|
|
||||||
|
### **Configurable Data Directory (v3.4.7)**
|
||||||
|
- **Centralized Data Path Management**: `data_path_manager.py` provides unified data directory path management
|
||||||
|
- **Configurable Location**: Data directory path can be set in `config/config.json` under `folder_structure.data_dir`
|
||||||
|
- **Backward Compatibility**: Defaults to "data" directory if not configured
|
||||||
|
- **Cross-Project Integration**: Enables the karaoke downloader to be used as a component in other projects with different data directory structures
|
||||||
|
|
||||||
### Core Modules:
|
### Core Modules:
|
||||||
- **`downloader.py`**: Main orchestrator and CLI interface
|
- **`downloader.py`**: Main orchestrator and CLI interface
|
||||||
- **`video_downloader.py`**: Core video download execution and orchestration
|
- **`video_downloader.py`**: Core video download execution and orchestration
|
||||||
@ -53,6 +59,9 @@ The codebase has been comprehensively refactored into a modular architecture wit
|
|||||||
- **`file_utils.py`**: Centralized file operations, filename sanitization, and file validation
|
- **`file_utils.py`**: Centralized file operations, filename sanitization, and file validation
|
||||||
- **`song_validator.py`**: Centralized song validation logic for checking if songs should be downloaded
|
- **`song_validator.py`**: Centralized song validation logic for checking if songs should be downloaded
|
||||||
|
|
||||||
|
### New Utility Modules (v3.4.7):
|
||||||
|
- **`data_path_manager.py`**: Centralized data directory path management and file path resolution
|
||||||
|
|
||||||
### **Unified Download Workflow (v3.4.5)**
|
### **Unified Download Workflow (v3.4.5)**
|
||||||
- **`execute_unified_download_workflow()`**: Centralized download execution that all modes use
|
- **`execute_unified_download_workflow()`**: Centralized download execution that all modes use
|
||||||
- **`_execute_sequential_downloads()`**: Sequential download execution using DownloadPipeline
|
- **`_execute_sequential_downloads()`**: Sequential download execution using DownloadPipeline
|
||||||
@ -401,13 +410,25 @@ KaroakeVideoDownloader/
|
|||||||
│ ├── check_resolution.py # Resolution checker utility
|
│ ├── check_resolution.py # Resolution checker utility
|
||||||
│ ├── resolution_cli.py # Resolution config CLI
|
│ ├── resolution_cli.py # Resolution config CLI
|
||||||
│ └── tracking_cli.py # Tracking management CLI
|
│ └── tracking_cli.py # Tracking management CLI
|
||||||
├── data/ # All config, tracking, cache, and songlist files
|
├── config/ # Configuration files
|
||||||
│ ├── config.json
|
│ └── config.json # Main configuration file
|
||||||
|
├── data/ # All tracking, cache, and songlist files
|
||||||
│ ├── karaoke_tracking.json
|
│ ├── karaoke_tracking.json
|
||||||
│ ├── songlist_tracking.json
|
│ ├── songlist_tracking.json
|
||||||
│ ├── channel_cache.json
|
│ ├── channel_cache.json
|
||||||
│ ├── channels.txt
|
│ ├── channels.txt
|
||||||
│ └── songList.json
|
│ └── songList.json
|
||||||
|
├── utilities/ # Utility scripts and tools
|
||||||
|
│ ├── add_manual_video.py # Manual video management
|
||||||
|
│ ├── build_cache_from_raw.py # Cache building utility
|
||||||
|
│ ├── cleanup_duplicate_files.py # File cleanup utilities
|
||||||
|
│ ├── cleanup_recent_tracking.py # Tracking cleanup utilities
|
||||||
|
│ ├── deduplicate_songlist_tracking.py # Data deduplication
|
||||||
|
│ ├── fix_artist_name_format.py # Data cleanup utilities
|
||||||
|
│ ├── fix_artist_name_format_simple.py
|
||||||
|
│ ├── fix_code_quality.py # Development tools
|
||||||
|
│ ├── reset_and_redownload.py # Maintenance utilities
|
||||||
|
│ └── songlist_report.py # Reporting utilities
|
||||||
├── downloads/ # All video output
|
├── downloads/ # All video output
|
||||||
│ └── [ChannelName]/ # Per-channel folders
|
│ └── [ChannelName]/ # Per-channel folders
|
||||||
├── logs/ # Download logs
|
├── logs/ # Download logs
|
||||||
@ -514,8 +535,9 @@ python download_karaoke.py --generate-unmatched-report --fuzzy-match --fuzzy-thr
|
|||||||
- Removes `.info.json` and `.meta` files after download
|
- Removes `.info.json` and `.meta` files after download
|
||||||
|
|
||||||
## 🛠️ Configuration
|
## 🛠️ Configuration
|
||||||
- All options are in `data/config.json` (format, resolution, metadata, etc.)
|
- All options are in `config/config.json` (format, resolution, metadata, etc.)
|
||||||
- You can edit this file or use CLI flags to override
|
- You can edit this file or use CLI flags to override
|
||||||
|
- **Configurable Data Directory**: The data directory path can be configured in `config/config.json` under `folder_structure.data_dir` (default: "data")
|
||||||
|
|
||||||
## 📋 Command Reference File
|
## 📋 Command Reference File
|
||||||
|
|
||||||
|
|||||||
@ -37,13 +37,13 @@ python download_karaoke.py --manual --songlist-only --limit 10
|
|||||||
python download_karaoke.py --manual --force --limit 5
|
python download_karaoke.py --manual --force --limit 5
|
||||||
|
|
||||||
# Add a video to manual collection (interactive)
|
# Add a video to manual collection (interactive)
|
||||||
python add_manual_video.py add "Artist - Song Title (Karaoke Version)" "https://www.youtube.com/watch?v=VIDEO_ID"
|
python utilities/add_manual_video.py add "Artist - Song Title (Karaoke Version)" "https://www.youtube.com/watch?v=VIDEO_ID"
|
||||||
|
|
||||||
# List all manual videos
|
# List all manual videos
|
||||||
python add_manual_video.py list
|
python utilities/add_manual_video.py list
|
||||||
|
|
||||||
# Remove a video from manual collection
|
# Remove a video from manual collection
|
||||||
python add_manual_video.py remove "Artist - Song Title (Karaoke Version)"
|
python utilities/add_manual_video.py remove "Artist - Song Title (Karaoke Version)"
|
||||||
|
|
||||||
## 🎬 ALL VIDEOS DOWNLOAD MODE (v3.4.4)
|
## 🎬 ALL VIDEOS DOWNLOAD MODE (v3.4.4)
|
||||||
|
|
||||||
|
|||||||
@ -26,6 +26,7 @@
|
|||||||
"folder_structure": {
|
"folder_structure": {
|
||||||
"downloads_dir": "downloads",
|
"downloads_dir": "downloads",
|
||||||
"logs_dir": "logs",
|
"logs_dir": "logs",
|
||||||
|
"data_dir": "data",
|
||||||
"tracking_file": "downloaded_videos.json"
|
"tracking_file": "downloaded_videos.json"
|
||||||
},
|
},
|
||||||
"logging": {
|
"logging": {
|
||||||
@ -1,6 +0,0 @@
|
|||||||
https://www.youtube.com/@SingKingKaraoke/videos
|
|
||||||
https://www.youtube.com/@KaraokeOnVEVO/videos
|
|
||||||
https://www.youtube.com/@StingrayKaraoke/videos
|
|
||||||
https://www.youtube.com/@sing2karaoke/videos
|
|
||||||
https://www.youtube.com/@ZoomKaraokeOfficial/videos
|
|
||||||
https://www.youtube.com/@VocalStarKaraoke/videos
|
|
||||||
@ -1,78 +0,0 @@
|
|||||||
{
|
|
||||||
"timestamp": "2025-08-05T16:01:09.018725",
|
|
||||||
"download_plan": [
|
|
||||||
{
|
|
||||||
"video_id": "oHV8Iw0R4BY",
|
|
||||||
"artist": "Shaboozey, Jelly Roll",
|
|
||||||
"title": "Amen",
|
|
||||||
"filename": "Shaboozey, Jelly Roll - Amen.mp4",
|
|
||||||
"channel_name": "@SingKingKaraoke",
|
|
||||||
"video_title": "Shaboozey, Jelly Roll - Amen (Karaoke Version)",
|
|
||||||
"force_download": false
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"video_id": "Jm3a-VAomH0",
|
|
||||||
"artist": "Pet Shop Boys",
|
|
||||||
"title": "Domino Dancing",
|
|
||||||
"filename": "Pet Shop Boys - Domino Dancing.mp4",
|
|
||||||
"channel_name": "@KaraokeOnVEVO",
|
|
||||||
"video_title": "Pet Shop Boys - Domino Dancing (Karaoke)",
|
|
||||||
"force_download": false
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"video_id": "6Vb0igX0-Ss",
|
|
||||||
"artist": "Chappell Roan",
|
|
||||||
"title": "The Giver",
|
|
||||||
"filename": "Chappell Roan - The Giver.mp4",
|
|
||||||
"channel_name": "@StingrayKaraoke",
|
|
||||||
"video_title": "Chappell Roan - The Giver (Karaoke Version)",
|
|
||||||
"force_download": false
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"video_id": "b1k2_B9oCr4",
|
|
||||||
"artist": "James Arthur",
|
|
||||||
"title": "Train Wreck",
|
|
||||||
"filename": "James Arthur - Train Wreck.mp4",
|
|
||||||
"channel_name": "@sing2karaoke",
|
|
||||||
"video_title": "James Arthur Train Wreck",
|
|
||||||
"force_download": false
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"video_id": "cg10FeEYSSQ",
|
|
||||||
"artist": "Caesars",
|
|
||||||
"title": "Jerk It Out",
|
|
||||||
"filename": "Caesars - Jerk It Out.mp4",
|
|
||||||
"channel_name": "@ZoomKaraokeOfficial",
|
|
||||||
"video_title": "Caesars - Jerk It Out - Karaoke Version from Zoom Karaoke",
|
|
||||||
"force_download": false
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"video_id": "m51bbu2ghp4",
|
|
||||||
"artist": "Jin",
|
|
||||||
"title": "Don't Say You Love Me",
|
|
||||||
"filename": "Jin - Dont Say You Love Me.mp4",
|
|
||||||
"channel_name": "@VocalStarKaraoke",
|
|
||||||
"video_title": "Don't Say You Love Me - Jin KARAOKE With Vocal Guide",
|
|
||||||
"force_download": false
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"video_id": "qegLWI99Wg0",
|
|
||||||
"artist": "Ed Sheeran & Beyoncé",
|
|
||||||
"title": "Perfect Duet",
|
|
||||||
"filename": "Ed Sheeran & Beyoncé - Perfect Duet.mp4",
|
|
||||||
"channel_name": "Unknown",
|
|
||||||
"video_title": "Ed Sheeran & Beyoncé - Perfect Duet",
|
|
||||||
"force_download": false
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"video_id": "ZbWHuncTgsM",
|
|
||||||
"artist": "Sia",
|
|
||||||
"title": "Snowman | Karaoke (instrumental)",
|
|
||||||
"filename": "Sia - Snowman Karaoke (instrumental).mp4",
|
|
||||||
"channel_name": "@LetsSingKaraoke",
|
|
||||||
"video_title": "Sia - Snowman | Karaoke (instrumental)",
|
|
||||||
"force_download": false
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"unmatched": []
|
|
||||||
}
|
|
||||||
@ -1,12 +0,0 @@
|
|||||||
{
|
|
||||||
"generated_at": "2025-08-05T16:07:48.031279",
|
|
||||||
"total_unmatched": 1,
|
|
||||||
"unmatched_songs": [
|
|
||||||
{
|
|
||||||
"artist": "SZA",
|
|
||||||
"title": "30 For 30",
|
|
||||||
"position": 3,
|
|
||||||
"search_key": "sza_30 for 30"
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}
|
|
||||||
@ -9,6 +9,8 @@ import json
|
|||||||
from datetime import datetime, timedelta
|
from datetime import datetime, timedelta
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
|
from karaoke_downloader.data_path_manager import get_data_path_manager
|
||||||
|
|
||||||
# Constants
|
# Constants
|
||||||
DEFAULT_CACHE_EXPIRATION_DAYS = 1
|
DEFAULT_CACHE_EXPIRATION_DAYS = 1
|
||||||
DEFAULT_CACHE_FILENAME_LENGTH_LIMIT = 200 # Increased from 60
|
DEFAULT_CACHE_FILENAME_LENGTH_LIMIT = 200 # Increased from 60
|
||||||
@ -37,7 +39,7 @@ def get_download_plan_cache_file(mode, **kwargs):
|
|||||||
+ hashlib.md5(base.encode()).hexdigest()[:8]
|
+ hashlib.md5(base.encode()).hexdigest()[:8]
|
||||||
)
|
)
|
||||||
|
|
||||||
return Path(f"data/{base}.json")
|
return get_data_path_manager().get_path(f"{base}.json")
|
||||||
|
|
||||||
|
|
||||||
def load_cached_plan(cache_file, max_age_days=DEFAULT_CACHE_EXPIRATION_DAYS):
|
def load_cached_plan(cache_file, max_age_days=DEFAULT_CACHE_EXPIRATION_DAYS):
|
||||||
|
|||||||
@ -11,11 +11,15 @@ import re
|
|||||||
from typing import Dict, List, Optional, Tuple, Any
|
from typing import Dict, List, Optional, Tuple, Any
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
|
from karaoke_downloader.data_path_manager import get_data_path_manager
|
||||||
|
|
||||||
|
|
||||||
class ChannelParser:
|
class ChannelParser:
|
||||||
"""Handles channel-specific parsing of video titles to extract artist and title."""
|
"""Handles channel-specific parsing of video titles to extract artist and title."""
|
||||||
|
|
||||||
def __init__(self, channels_file: str = "data/channels.json"):
|
def __init__(self, channels_file: str = None):
|
||||||
|
if channels_file is None:
|
||||||
|
channels_file = str(get_data_path_manager().get_channels_json_path())
|
||||||
"""Initialize the parser with channel configuration."""
|
"""Initialize the parser with channel configuration."""
|
||||||
self.channels_file = Path(channels_file)
|
self.channels_file = Path(channels_file)
|
||||||
self.channels_config = self._load_channels_config()
|
self.channels_config = self._load_channels_config()
|
||||||
@ -238,7 +242,9 @@ class ChannelParser:
|
|||||||
|
|
||||||
|
|
||||||
# Convenience function for backward compatibility
|
# Convenience function for backward compatibility
|
||||||
def extract_artist_title(video_title: str, channel_name: str, channels_file: str = "data/channels.json") -> Tuple[str, str]:
|
def extract_artist_title(video_title: str, channel_name: str, channels_file: str = None) -> Tuple[str, str]:
|
||||||
|
if channels_file is None:
|
||||||
|
channels_file = str(get_data_path_manager().get_channels_json_path())
|
||||||
"""
|
"""
|
||||||
Convenience function to extract artist and title from a video title.
|
Convenience function to extract artist and title from a video title.
|
||||||
|
|
||||||
|
|||||||
@ -12,6 +12,7 @@ from typing import List
|
|||||||
|
|
||||||
from karaoke_downloader.channel_parser import ChannelParser
|
from karaoke_downloader.channel_parser import ChannelParser
|
||||||
from karaoke_downloader.config_manager import AppConfig
|
from karaoke_downloader.config_manager import AppConfig
|
||||||
|
from karaoke_downloader.data_path_manager import get_data_path_manager
|
||||||
from karaoke_downloader.downloader import KaraokeDownloader
|
from karaoke_downloader.downloader import KaraokeDownloader
|
||||||
|
|
||||||
# Constants
|
# Constants
|
||||||
@ -19,16 +20,19 @@ DEFAULT_LATEST_PER_CHANNEL_LIMIT = 10
|
|||||||
DEFAULT_FUZZY_THRESHOLD = 85
|
DEFAULT_FUZZY_THRESHOLD = 85
|
||||||
|
|
||||||
|
|
||||||
def load_channels_from_json(channels_file: str = "data/channels.json") -> List[str]:
|
def load_channels_from_json(channels_file: str = None) -> List[str]:
|
||||||
"""
|
"""
|
||||||
Load channel URLs from the new JSON format.
|
Load channel URLs from the new JSON format.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
channels_file: Path to the channels.json file
|
channels_file: Path to the channels.json file (if None, uses default from config)
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
List of channel URLs
|
List of channel URLs
|
||||||
"""
|
"""
|
||||||
|
if channels_file is None:
|
||||||
|
channels_file = str(get_data_path_manager().get_channels_json_path())
|
||||||
|
|
||||||
try:
|
try:
|
||||||
parser = ChannelParser(channels_file)
|
parser = ChannelParser(channels_file)
|
||||||
channels = parser.channels_config.get("channels", [])
|
channels = parser.channels_config.get("channels", [])
|
||||||
@ -38,16 +42,19 @@ def load_channels_from_json(channels_file: str = "data/channels.json") -> List[s
|
|||||||
return []
|
return []
|
||||||
|
|
||||||
|
|
||||||
def load_channels_from_text(channels_file: str = "data/channels.txt") -> List[str]:
|
def load_channels_from_text(channels_file: str = None) -> List[str]:
|
||||||
"""
|
"""
|
||||||
Load channel URLs from the old text format (for backward compatibility).
|
Load channel URLs from the old text format (for backward compatibility).
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
channels_file: Path to the channels.txt file
|
channels_file: Path to the channels.txt file (if None, uses default from config)
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
List of channel URLs
|
List of channel URLs
|
||||||
"""
|
"""
|
||||||
|
if channels_file is None:
|
||||||
|
channels_file = str(get_data_path_manager().get_channels_txt_path())
|
||||||
|
|
||||||
try:
|
try:
|
||||||
with open(channels_file, "r", encoding="utf-8") as f:
|
with open(channels_file, "r", encoding="utf-8") as f:
|
||||||
return [
|
return [
|
||||||
@ -63,11 +70,10 @@ def load_channels_from_text(channels_file: str = "data/channels.txt") -> List[st
|
|||||||
def load_channels(channel_file: str = None) -> List[str]:
|
def load_channels(channel_file: str = None) -> List[str]:
|
||||||
"""Load channel URLs from file."""
|
"""Load channel URLs from file."""
|
||||||
if channel_file is None:
|
if channel_file is None:
|
||||||
# Try JSON first, then fall back to text
|
# Use JSON configuration
|
||||||
if os.path.exists("data/channels.json"):
|
data_path_manager = get_data_path_manager()
|
||||||
return load_channels_from_json("data/channels.json")
|
if data_path_manager.file_exists("channels.json"):
|
||||||
elif os.path.exists("data/channels.txt"):
|
return load_channels_from_json()
|
||||||
return load_channels_from_text("data/channels.txt")
|
|
||||||
else:
|
else:
|
||||||
return []
|
return []
|
||||||
else:
|
else:
|
||||||
@ -176,7 +182,7 @@ Examples:
|
|||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--songlist-priority",
|
"--songlist-priority",
|
||||||
action="store_true",
|
action="store_true",
|
||||||
help="Prioritize downloads based on data/songList.json (default: enabled)",
|
help="Prioritize downloads based on songList.json in the data directory (default: enabled)",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--no-songlist-priority",
|
"--no-songlist-priority",
|
||||||
@ -218,7 +224,7 @@ Examples:
|
|||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--songlist-file",
|
"--songlist-file",
|
||||||
metavar="FILE_PATH",
|
metavar="FILE_PATH",
|
||||||
help="Custom songlist file path to use with --songlist-focus (default: data/songList.json)",
|
help="Custom songlist file path to use with --songlist-focus (default: songList.json in the data directory)",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--force",
|
"--force",
|
||||||
@ -299,7 +305,7 @@ Examples:
|
|||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--manual",
|
"--manual",
|
||||||
action="store_true",
|
action="store_true",
|
||||||
help="Download from manual videos collection (data/manual_videos.json)",
|
help="Download from manual videos collection (manual_videos.json in the data directory)",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--channel-focus",
|
"--channel-focus",
|
||||||
@ -421,7 +427,7 @@ Examples:
|
|||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
print(
|
print(
|
||||||
"❌ No URL, --file, or channel configuration found. Please provide a channel URL or create data/channels.json."
|
"❌ No URL, --file, or channel configuration found. Please provide a channel URL or create channels.json in the data directory."
|
||||||
)
|
)
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
# --- END NEW ---
|
# --- END NEW ---
|
||||||
@ -617,7 +623,7 @@ Examples:
|
|||||||
channel_urls = load_channels(args.file)
|
channel_urls = load_channels(args.file)
|
||||||
if not channel_urls:
|
if not channel_urls:
|
||||||
print(f"❌ No channels found in configuration")
|
print(f"❌ No channels found in configuration")
|
||||||
print("Please provide a channel URL or create data/channels.json")
|
print("Please provide a channel URL or create channels.json in the data directory")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
limit = args.limit if args.limit else DEFAULT_LATEST_PER_CHANNEL_LIMIT
|
limit = args.limit if args.limit else DEFAULT_LATEST_PER_CHANNEL_LIMIT
|
||||||
force_refresh_download_plan = (
|
force_refresh_download_plan = (
|
||||||
@ -649,7 +655,7 @@ Examples:
|
|||||||
# Load songlist based on focus mode
|
# Load songlist based on focus mode
|
||||||
if args.songlist_focus:
|
if args.songlist_focus:
|
||||||
# Load focused playlists
|
# Load focused playlists
|
||||||
songlist_file_path = args.songlist_file if args.songlist_file else "data/songList.json"
|
songlist_file_path = args.songlist_file if args.songlist_file else str(get_data_path_manager().get_songlist_path())
|
||||||
songlist_file = Path(songlist_file_path)
|
songlist_file = Path(songlist_file_path)
|
||||||
if not songlist_file.exists():
|
if not songlist_file.exists():
|
||||||
print(f"⚠️ Songlist file not found: {songlist_file_path}")
|
print(f"⚠️ Songlist file not found: {songlist_file_path}")
|
||||||
@ -697,12 +703,12 @@ Examples:
|
|||||||
songlist = []
|
songlist = []
|
||||||
else:
|
else:
|
||||||
# Load all songs from songlist
|
# Load all songs from songlist
|
||||||
songlist_path = args.songlist_file if args.songlist_file else "data/songList.json"
|
songlist_path = args.songlist_file if args.songlist_file else str(get_data_path_manager().get_songlist_path())
|
||||||
songlist = load_songlist(songlist_path)
|
songlist = load_songlist(songlist_path)
|
||||||
|
|
||||||
if songlist:
|
if songlist:
|
||||||
# Load channel URLs
|
# Load channel URLs
|
||||||
channel_file = args.file if args.file else "data/channels.txt"
|
channel_file = args.file if args.file else str(get_data_path_manager().get_channels_txt_path())
|
||||||
if os.path.exists(channel_file):
|
if os.path.exists(channel_file):
|
||||||
with open(channel_file, "r", encoding='utf-8') as f:
|
with open(channel_file, "r", encoding='utf-8') as f:
|
||||||
channel_urls = [
|
channel_urls = [
|
||||||
|
|||||||
@ -36,6 +36,7 @@ DEFAULT_CONFIG = {
|
|||||||
"folder_structure": {
|
"folder_structure": {
|
||||||
"downloads_dir": "downloads",
|
"downloads_dir": "downloads",
|
||||||
"logs_dir": "logs",
|
"logs_dir": "logs",
|
||||||
|
"data_dir": "data",
|
||||||
"tracking_file": "data/karaoke_tracking.json",
|
"tracking_file": "data/karaoke_tracking.json",
|
||||||
},
|
},
|
||||||
"logging": {
|
"logging": {
|
||||||
@ -135,6 +136,7 @@ class FolderStructure:
|
|||||||
|
|
||||||
downloads_dir: str = "downloads"
|
downloads_dir: str = "downloads"
|
||||||
logs_dir: str = "logs"
|
logs_dir: str = "logs"
|
||||||
|
data_dir: str = "data"
|
||||||
tracking_file: str = "data/karaoke_tracking.json"
|
tracking_file: str = "data/karaoke_tracking.json"
|
||||||
|
|
||||||
|
|
||||||
@ -165,14 +167,21 @@ class ConfigManager:
|
|||||||
Manages application configuration with loading, validation, and caching.
|
Manages application configuration with loading, validation, and caching.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, config_file: Union[str, Path] = "data/config.json"):
|
def __init__(self, config_file: Union[str, Path] = "config/config.json", data_dir: Optional[str] = None):
|
||||||
"""
|
"""
|
||||||
Initialize the configuration manager.
|
Initialize the configuration manager.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
config_file: Path to the configuration file
|
config_file: Path to the configuration file
|
||||||
|
data_dir: Optional custom data directory path
|
||||||
"""
|
"""
|
||||||
|
# If config_file is relative and data_dir is provided, make it relative to data_dir
|
||||||
|
if data_dir and not Path(config_file).is_absolute():
|
||||||
|
self.config_file = Path(data_dir) / config_file
|
||||||
|
else:
|
||||||
self.config_file = Path(config_file)
|
self.config_file = Path(config_file)
|
||||||
|
|
||||||
|
self._data_dir = data_dir
|
||||||
self._config: Optional[AppConfig] = None
|
self._config: Optional[AppConfig] = None
|
||||||
self._last_modified: Optional[datetime] = None
|
self._last_modified: Optional[datetime] = None
|
||||||
|
|
||||||
@ -333,27 +342,35 @@ class ConfigManager:
|
|||||||
_config_manager: Optional[ConfigManager] = None
|
_config_manager: Optional[ConfigManager] = None
|
||||||
|
|
||||||
|
|
||||||
def get_config_manager() -> ConfigManager:
|
def get_config_manager(config_file: Optional[Union[str, Path]] = None, data_dir: Optional[str] = None) -> ConfigManager:
|
||||||
"""
|
"""
|
||||||
Get the global configuration manager instance.
|
Get the global configuration manager instance.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
config_file: Optional path to config file (default: "config.json" in root)
|
||||||
|
data_dir: Optional custom data directory path
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
ConfigManager instance
|
ConfigManager instance
|
||||||
"""
|
"""
|
||||||
global _config_manager
|
global _config_manager
|
||||||
if _config_manager is None:
|
if _config_manager is None or config_file is not None or data_dir is not None:
|
||||||
_config_manager = ConfigManager()
|
if config_file is None:
|
||||||
|
config_file = "config/config.json"
|
||||||
|
_config_manager = ConfigManager(config_file, data_dir)
|
||||||
return _config_manager
|
return _config_manager
|
||||||
|
|
||||||
|
|
||||||
def load_config(force_reload: bool = False) -> AppConfig:
|
def load_config(force_reload: bool = False, config_file: Optional[Union[str, Path]] = None, data_dir: Optional[str] = None) -> AppConfig:
|
||||||
"""
|
"""
|
||||||
Load configuration using the global manager.
|
Load configuration using the global manager.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
force_reload: Force reload even if file hasn't changed
|
force_reload: Force reload even if file hasn't changed
|
||||||
|
config_file: Optional path to config file (default: "config.json" in root)
|
||||||
|
data_dir: Optional custom data directory path
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
AppConfig instance
|
AppConfig instance
|
||||||
"""
|
"""
|
||||||
return get_config_manager().load_config(force_reload)
|
return get_config_manager(config_file, data_dir).load_config(force_reload)
|
||||||
|
|||||||
184
karaoke_downloader/data_path_manager.py
Normal file
184
karaoke_downloader/data_path_manager.py
Normal file
@ -0,0 +1,184 @@
|
|||||||
|
"""
|
||||||
|
Data path management utilities for the karaoke downloader.
|
||||||
|
Provides centralized data directory path management and file path resolution.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
from .config_manager import get_config_manager
|
||||||
|
|
||||||
|
|
||||||
|
class DataPathManager:
|
||||||
|
"""
|
||||||
|
Manages data directory paths and provides utilities for resolving file paths
|
||||||
|
relative to the configured data directory.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, data_dir: Optional[str] = None):
|
||||||
|
"""
|
||||||
|
Initialize the data path manager.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
data_dir: Optional custom data directory path. If None, uses config.
|
||||||
|
"""
|
||||||
|
self._data_dir = data_dir
|
||||||
|
|
||||||
|
# If a custom data directory is provided, look for config.json in that directory
|
||||||
|
if data_dir:
|
||||||
|
config_file = Path(data_dir) / "config.json"
|
||||||
|
self._config_manager = get_config_manager(str(config_file))
|
||||||
|
else:
|
||||||
|
# Otherwise, use the default config.json in the root directory
|
||||||
|
self._config_manager = get_config_manager()
|
||||||
|
|
||||||
|
@property
|
||||||
|
def data_dir(self) -> Path:
|
||||||
|
"""
|
||||||
|
Get the configured data directory path.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Path to the data directory
|
||||||
|
"""
|
||||||
|
if self._data_dir:
|
||||||
|
return Path(self._data_dir)
|
||||||
|
|
||||||
|
# Get from config
|
||||||
|
config = self._config_manager.get_config()
|
||||||
|
data_dir = getattr(config.folder_structure, 'data_dir', 'data')
|
||||||
|
return Path(data_dir)
|
||||||
|
|
||||||
|
def get_path(self, filename: str) -> Path:
|
||||||
|
"""
|
||||||
|
Get the full path to a file in the data directory.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
filename: Name of the file (e.g., 'config.json', 'channels.json')
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Full path to the file
|
||||||
|
"""
|
||||||
|
return self.data_dir / filename
|
||||||
|
|
||||||
|
def get_channels_json_path(self) -> Path:
|
||||||
|
"""Get path to channels.json file."""
|
||||||
|
return self.get_path('channels.json')
|
||||||
|
|
||||||
|
def get_channels_txt_path(self) -> Path:
|
||||||
|
"""Get path to channels.txt file."""
|
||||||
|
return self.get_path('channels.txt')
|
||||||
|
|
||||||
|
def get_songlist_path(self) -> Path:
|
||||||
|
"""Get path to songList.json file."""
|
||||||
|
return self.get_path('songList.json')
|
||||||
|
|
||||||
|
def get_songlist_tracking_path(self) -> Path:
|
||||||
|
"""Get path to songlist_tracking.json file."""
|
||||||
|
return self.get_path('songlist_tracking.json')
|
||||||
|
|
||||||
|
def get_karaoke_tracking_path(self) -> Path:
|
||||||
|
"""Get path to karaoke_tracking.json file."""
|
||||||
|
return self.get_path('karaoke_tracking.json')
|
||||||
|
|
||||||
|
def get_server_duplicates_tracking_path(self) -> Path:
|
||||||
|
"""Get path to server_duplicates_tracking.json file."""
|
||||||
|
return self.get_path('server_duplicates_tracking.json')
|
||||||
|
|
||||||
|
def get_manual_videos_path(self) -> Path:
|
||||||
|
"""Get path to manual_videos.json file."""
|
||||||
|
return self.get_path('manual_videos.json')
|
||||||
|
|
||||||
|
def get_songs_path(self) -> Path:
|
||||||
|
"""Get path to songs.json file."""
|
||||||
|
return self.get_path('songs.json')
|
||||||
|
|
||||||
|
def get_channel_cache_dir(self) -> Path:
|
||||||
|
"""Get path to channel_cache directory."""
|
||||||
|
return self.get_path('channel_cache')
|
||||||
|
|
||||||
|
def get_channel_cache_path(self, channel_id: str) -> Path:
|
||||||
|
"""Get path to a specific channel cache file."""
|
||||||
|
return self.get_channel_cache_dir() / f"{channel_id}.json"
|
||||||
|
|
||||||
|
def get_download_plan_cache_path(self, plan_name: str, **kwargs) -> Path:
|
||||||
|
"""Get path to download plan cache file."""
|
||||||
|
# Create a hash from kwargs for unique cache files
|
||||||
|
import hashlib
|
||||||
|
if kwargs:
|
||||||
|
kwargs_str = str(sorted(kwargs.items()))
|
||||||
|
hash_suffix = hashlib.md5(kwargs_str.encode()).hexdigest()[:8]
|
||||||
|
plan_name = f"{plan_name}_{hash_suffix}"
|
||||||
|
return self.get_path(f"plan_latest_per_channel_{plan_name}.json")
|
||||||
|
|
||||||
|
def get_unmatched_report_path(self, timestamp: Optional[str] = None) -> Path:
|
||||||
|
"""Get path to unmatched songs report file."""
|
||||||
|
if timestamp:
|
||||||
|
return self.get_path(f"unmatched_songs_report_{timestamp}.json")
|
||||||
|
return self.get_path("unmatched_songs_report.json")
|
||||||
|
|
||||||
|
def ensure_data_dir_exists(self) -> None:
|
||||||
|
"""Ensure the data directory exists."""
|
||||||
|
self.data_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
def list_data_files(self) -> list:
|
||||||
|
"""List all files in the data directory."""
|
||||||
|
if not self.data_dir.exists():
|
||||||
|
return []
|
||||||
|
|
||||||
|
files = []
|
||||||
|
for file_path in self.data_dir.iterdir():
|
||||||
|
if file_path.is_file():
|
||||||
|
files.append(file_path.name)
|
||||||
|
return sorted(files)
|
||||||
|
|
||||||
|
def file_exists(self, filename: str) -> bool:
|
||||||
|
"""Check if a file exists in the data directory."""
|
||||||
|
return self.get_path(filename).exists()
|
||||||
|
|
||||||
|
|
||||||
|
# Global data path manager instance
|
||||||
|
_data_path_manager: Optional[DataPathManager] = None
|
||||||
|
|
||||||
|
|
||||||
|
def get_data_path_manager(data_dir: Optional[str] = None) -> DataPathManager:
|
||||||
|
"""
|
||||||
|
Get the global data path manager instance.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
data_dir: Optional custom data directory path
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
DataPathManager instance
|
||||||
|
"""
|
||||||
|
global _data_path_manager
|
||||||
|
if _data_path_manager is None or data_dir is not None:
|
||||||
|
_data_path_manager = DataPathManager(data_dir)
|
||||||
|
return _data_path_manager
|
||||||
|
|
||||||
|
|
||||||
|
def get_data_path(filename: str, data_dir: Optional[str] = None) -> Path:
|
||||||
|
"""
|
||||||
|
Get the full path to a file in the data directory.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
filename: Name of the file
|
||||||
|
data_dir: Optional custom data directory path
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Full path to the file
|
||||||
|
"""
|
||||||
|
return get_data_path_manager(data_dir).get_path(filename)
|
||||||
|
|
||||||
|
|
||||||
|
def get_data_dir(data_dir: Optional[str] = None) -> Path:
|
||||||
|
"""
|
||||||
|
Get the configured data directory path.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
data_dir: Optional custom data directory path
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Path to the data directory
|
||||||
|
"""
|
||||||
|
return get_data_path_manager(data_dir).data_dir
|
||||||
@ -27,6 +27,7 @@ from karaoke_downloader.fuzzy_matcher import (
|
|||||||
normalize_title,
|
normalize_title,
|
||||||
)
|
)
|
||||||
from karaoke_downloader.channel_parser import ChannelParser
|
from karaoke_downloader.channel_parser import ChannelParser
|
||||||
|
from karaoke_downloader.data_path_manager import get_data_path_manager
|
||||||
from karaoke_downloader.youtube_utils import get_channel_info
|
from karaoke_downloader.youtube_utils import get_channel_info
|
||||||
|
|
||||||
# Constants
|
# Constants
|
||||||
@ -50,7 +51,7 @@ def generate_unmatched_report(unmatched: List[Dict[str, Any]], report_path: str
|
|||||||
"""
|
"""
|
||||||
if report_path is None:
|
if report_path is None:
|
||||||
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||||
report_path = f"data/unmatched_songs_report_{timestamp}.json"
|
report_path = str(get_data_path_manager().get_unmatched_report_path(timestamp))
|
||||||
|
|
||||||
report_data = {
|
report_data = {
|
||||||
"generated_at": datetime.now().isoformat(),
|
"generated_at": datetime.now().isoformat(),
|
||||||
|
|||||||
@ -21,6 +21,7 @@ from karaoke_downloader.channel_manager import (
|
|||||||
reset_channel_downloads,
|
reset_channel_downloads,
|
||||||
)
|
)
|
||||||
from karaoke_downloader.config_manager import get_config_manager, load_config
|
from karaoke_downloader.config_manager import get_config_manager, load_config
|
||||||
|
from karaoke_downloader.data_path_manager import get_data_path_manager
|
||||||
from karaoke_downloader.download_pipeline import DownloadPipeline
|
from karaoke_downloader.download_pipeline import DownloadPipeline
|
||||||
from karaoke_downloader.download_planner import build_download_plan
|
from karaoke_downloader.download_planner import build_download_plan
|
||||||
from karaoke_downloader.error_utils import handle_yt_dlp_error, log_error
|
from karaoke_downloader.error_utils import handle_yt_dlp_error, log_error
|
||||||
@ -89,8 +90,9 @@ class KaraokeDownloader:
|
|||||||
ensure_directory_exists(self.logs_dir)
|
ensure_directory_exists(self.logs_dir)
|
||||||
|
|
||||||
# Initialize tracking
|
# Initialize tracking
|
||||||
tracking_file = DATA_DIR / "karaoke_tracking.json"
|
data_path_manager = get_data_path_manager()
|
||||||
cache_dir = DATA_DIR / "channel_cache"
|
tracking_file = data_path_manager.get_karaoke_tracking_path()
|
||||||
|
cache_dir = data_path_manager.get_channel_cache_dir()
|
||||||
self.tracker = TrackingManager(
|
self.tracker = TrackingManager(
|
||||||
tracking_file=tracking_file, cache_dir=cache_dir
|
tracking_file=tracking_file, cache_dir=cache_dir
|
||||||
)
|
)
|
||||||
@ -123,7 +125,7 @@ class KaraokeDownloader:
|
|||||||
self.dry_run = False
|
self.dry_run = False
|
||||||
self.download_limit = None
|
self.download_limit = None
|
||||||
self.force_download = False
|
self.force_download = False
|
||||||
self.songlist_file_path = "data/songList.json" # Default songlist file path
|
self.songlist_file_path = str(get_data_path_manager().get_songlist_path()) # Default songlist file path
|
||||||
|
|
||||||
def _load_config(self):
|
def _load_config(self):
|
||||||
"""Load configuration using the config manager."""
|
"""Load configuration using the config manager."""
|
||||||
@ -1179,8 +1181,9 @@ def reset_songlist_all():
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
# Load songlist tracking
|
# Load songlist tracking
|
||||||
songlist_tracking_file = Path("data/songlist_tracking.json")
|
data_path_manager = get_data_path_manager()
|
||||||
karaoke_tracking_file = Path("data/karaoke_tracking.json")
|
songlist_tracking_file = data_path_manager.get_songlist_tracking_path()
|
||||||
|
karaoke_tracking_file = data_path_manager.get_karaoke_tracking_path()
|
||||||
if songlist_tracking_file.exists():
|
if songlist_tracking_file.exists():
|
||||||
with open(songlist_tracking_file, "r", encoding="utf-8") as f:
|
with open(songlist_tracking_file, "r", encoding="utf-8") as f:
|
||||||
tracking = json.load(f)
|
tracking = json.load(f)
|
||||||
|
|||||||
@ -6,7 +6,11 @@ import json
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Dict, List, Optional, Any
|
from typing import Dict, List, Optional, Any
|
||||||
|
|
||||||
def load_manual_videos(manual_file: str = "data/manual_videos.json") -> List[Dict[str, Any]]:
|
from karaoke_downloader.data_path_manager import get_data_path_manager
|
||||||
|
|
||||||
|
def load_manual_videos(manual_file: str = None) -> List[Dict[str, Any]]:
|
||||||
|
if manual_file is None:
|
||||||
|
manual_file = str(get_data_path_manager().get_manual_videos_path())
|
||||||
"""
|
"""
|
||||||
Load manual videos from the JSON file.
|
Load manual videos from the JSON file.
|
||||||
|
|
||||||
@ -34,7 +38,9 @@ def load_manual_videos(manual_file: str = "data/manual_videos.json") -> List[Dic
|
|||||||
print(f"❌ Error loading manual videos: {e}")
|
print(f"❌ Error loading manual videos: {e}")
|
||||||
return []
|
return []
|
||||||
|
|
||||||
def get_manual_videos_for_channel(channel_name: str, manual_file: str = "data/manual_videos.json") -> List[Dict[str, Any]]:
|
def get_manual_videos_for_channel(channel_name: str, manual_file: str = None) -> List[Dict[str, Any]]:
|
||||||
|
if manual_file is None:
|
||||||
|
manual_file = str(get_data_path_manager().get_manual_videos_path())
|
||||||
"""
|
"""
|
||||||
Get manual videos for a specific channel.
|
Get manual videos for a specific channel.
|
||||||
|
|
||||||
|
|||||||
@ -7,8 +7,12 @@ import json
|
|||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
|
from karaoke_downloader.data_path_manager import get_data_path_manager
|
||||||
|
|
||||||
def load_server_songs(songs_path="data/songs.json"):
|
|
||||||
|
def load_server_songs(songs_path=None):
|
||||||
|
if songs_path is None:
|
||||||
|
songs_path = str(get_data_path_manager().get_songs_path())
|
||||||
"""Load the list of songs already available on the server with format information."""
|
"""Load the list of songs already available on the server with format information."""
|
||||||
songs_file = Path(songs_path)
|
songs_file = Path(songs_path)
|
||||||
if not songs_file.exists():
|
if not songs_file.exists():
|
||||||
@ -59,8 +63,10 @@ def should_skip_server_song(server_songs, artist, title):
|
|||||||
|
|
||||||
|
|
||||||
def load_server_duplicates_tracking(
|
def load_server_duplicates_tracking(
|
||||||
tracking_path="data/server_duplicates_tracking.json",
|
tracking_path=None,
|
||||||
):
|
):
|
||||||
|
if tracking_path is None:
|
||||||
|
tracking_path = str(get_data_path_manager().get_server_duplicates_tracking_path())
|
||||||
"""Load the tracking of songs found to be duplicates on the server."""
|
"""Load the tracking of songs found to be duplicates on the server."""
|
||||||
tracking_file = Path(tracking_path)
|
tracking_file = Path(tracking_path)
|
||||||
if not tracking_file.exists():
|
if not tracking_file.exists():
|
||||||
@ -74,8 +80,10 @@ def load_server_duplicates_tracking(
|
|||||||
|
|
||||||
|
|
||||||
def save_server_duplicates_tracking(
|
def save_server_duplicates_tracking(
|
||||||
tracking, tracking_path="data/server_duplicates_tracking.json"
|
tracking, tracking_path=None
|
||||||
):
|
):
|
||||||
|
if tracking_path is None:
|
||||||
|
tracking_path = str(get_data_path_manager().get_server_duplicates_tracking_path())
|
||||||
"""Save the tracking of songs found to be duplicates on the server."""
|
"""Save the tracking of songs found to be duplicates on the server."""
|
||||||
try:
|
try:
|
||||||
with open(tracking_path, "w", encoding="utf-8") as f:
|
with open(tracking_path, "w", encoding="utf-8") as f:
|
||||||
|
|||||||
@ -4,11 +4,15 @@ from pathlib import Path
|
|||||||
from typing import List, Dict, Any, Optional
|
from typing import List, Dict, Any, Optional
|
||||||
from mutagen.mp4 import MP4
|
from mutagen.mp4 import MP4
|
||||||
|
|
||||||
|
from karaoke_downloader.data_path_manager import get_data_path_manager
|
||||||
|
|
||||||
|
|
||||||
class SongListGenerator:
|
class SongListGenerator:
|
||||||
"""Utility class for generating song lists from MP4 files with ID3 tags."""
|
"""Utility class for generating song lists from MP4 files with ID3 tags."""
|
||||||
|
|
||||||
def __init__(self, songlist_path: str = "data/songList.json"):
|
def __init__(self, songlist_path: str = None):
|
||||||
|
if songlist_path is None:
|
||||||
|
songlist_path = str(get_data_path_manager().get_songlist_path())
|
||||||
self.songlist_path = Path(songlist_path)
|
self.songlist_path = Path(songlist_path)
|
||||||
self.songlist_path.parent.mkdir(parents=True, exist_ok=True)
|
self.songlist_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
@ -239,8 +243,8 @@ Examples:
|
|||||||
|
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--songlist-path",
|
"--songlist-path",
|
||||||
default="data/songList.json",
|
default=None,
|
||||||
help="Path to the song list JSON file (default: data/songList.json)"
|
help="Path to the song list JSON file (default: songList.json in the data directory)"
|
||||||
)
|
)
|
||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|||||||
@ -7,6 +7,7 @@ import json
|
|||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
|
from karaoke_downloader.data_path_manager import get_data_path_manager
|
||||||
from karaoke_downloader.server_manager import (
|
from karaoke_downloader.server_manager import (
|
||||||
check_and_mark_server_duplicate,
|
check_and_mark_server_duplicate,
|
||||||
is_song_marked_as_server_duplicate,
|
is_song_marked_as_server_duplicate,
|
||||||
@ -16,7 +17,9 @@ from karaoke_downloader.server_manager import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def load_songlist(songlist_path="data/songList.json"):
|
def load_songlist(songlist_path=None):
|
||||||
|
if songlist_path is None:
|
||||||
|
songlist_path = str(get_data_path_manager().get_songlist_path())
|
||||||
songlist_file = Path(songlist_path)
|
songlist_file = Path(songlist_path)
|
||||||
if not songlist_file.exists():
|
if not songlist_file.exists():
|
||||||
print(f"⚠️ Songlist file not found: {songlist_path}")
|
print(f"⚠️ Songlist file not found: {songlist_path}")
|
||||||
@ -55,7 +58,9 @@ def normalize_title(title):
|
|||||||
return " ".join(normalized.split()).lower()
|
return " ".join(normalized.split()).lower()
|
||||||
|
|
||||||
|
|
||||||
def load_songlist_tracking(tracking_path="data/songlist_tracking.json"):
|
def load_songlist_tracking(tracking_path=None):
|
||||||
|
if tracking_path is None:
|
||||||
|
tracking_path = str(get_data_path_manager().get_songlist_tracking_path())
|
||||||
tracking_file = Path(tracking_path)
|
tracking_file = Path(tracking_path)
|
||||||
if not tracking_file.exists():
|
if not tracking_file.exists():
|
||||||
return {}
|
return {}
|
||||||
@ -67,7 +72,9 @@ def load_songlist_tracking(tracking_path="data/songlist_tracking.json"):
|
|||||||
return {}
|
return {}
|
||||||
|
|
||||||
|
|
||||||
def save_songlist_tracking(tracking, tracking_path="data/songlist_tracking.json"):
|
def save_songlist_tracking(tracking, tracking_path=None):
|
||||||
|
if tracking_path is None:
|
||||||
|
tracking_path = str(get_data_path_manager().get_songlist_tracking_path())
|
||||||
try:
|
try:
|
||||||
with open(tracking_path, "w", encoding="utf-8") as f:
|
with open(tracking_path, "w", encoding="utf-8") as f:
|
||||||
json.dump(tracking, f, indent=2, ensure_ascii=False)
|
json.dump(tracking, f, indent=2, ensure_ascii=False)
|
||||||
|
|||||||
@ -6,6 +6,8 @@ from enum import Enum
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Any, Dict, List, Optional, Tuple
|
from typing import Any, Dict, List, Optional, Tuple
|
||||||
|
|
||||||
|
from karaoke_downloader.data_path_manager import get_data_path_manager
|
||||||
|
|
||||||
class SongStatus(str, Enum):
|
class SongStatus(str, Enum):
|
||||||
NOT_DOWNLOADED = "NOT_DOWNLOADED"
|
NOT_DOWNLOADED = "NOT_DOWNLOADED"
|
||||||
DOWNLOADING = "DOWNLOADING"
|
DOWNLOADING = "DOWNLOADING"
|
||||||
@ -25,9 +27,14 @@ class FormatType(str, Enum):
|
|||||||
class TrackingManager:
|
class TrackingManager:
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
tracking_file="data/karaoke_tracking.json",
|
tracking_file=None,
|
||||||
cache_dir="data/channel_cache",
|
cache_dir=None,
|
||||||
):
|
):
|
||||||
|
if tracking_file is None:
|
||||||
|
tracking_file = str(get_data_path_manager().get_karaoke_tracking_path())
|
||||||
|
if cache_dir is None:
|
||||||
|
cache_dir = str(get_data_path_manager().get_channel_cache_dir())
|
||||||
|
|
||||||
self.tracking_file = Path(tracking_file)
|
self.tracking_file = Path(tracking_file)
|
||||||
self.cache_dir = Path(cache_dir)
|
self.cache_dir = Path(cache_dir)
|
||||||
|
|
||||||
|
|||||||
@ -8,6 +8,8 @@ import re
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Dict, List, Optional
|
from typing import Dict, List, Optional
|
||||||
|
|
||||||
|
from karaoke_downloader.data_path_manager import get_data_path_manager
|
||||||
|
|
||||||
def extract_video_id(url: str) -> Optional[str]:
|
def extract_video_id(url: str) -> Optional[str]:
|
||||||
"""Extract video ID from YouTube URL."""
|
"""Extract video ID from YouTube URL."""
|
||||||
patterns = [
|
patterns = [
|
||||||
@ -21,7 +23,9 @@ def extract_video_id(url: str) -> Optional[str]:
|
|||||||
return match.group(1)
|
return match.group(1)
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def add_manual_video(title: str, url: str, manual_file: str = "data/manual_videos.json"):
|
def add_manual_video(title: str, url: str, manual_file: str = None):
|
||||||
|
if manual_file is None:
|
||||||
|
manual_file = str(get_data_path_manager().get_manual_videos_path())
|
||||||
"""
|
"""
|
||||||
Add a manual video to the collection.
|
Add a manual video to the collection.
|
||||||
|
|
||||||
@ -88,7 +92,9 @@ def add_manual_video(title: str, url: str, manual_file: str = "data/manual_video
|
|||||||
print(f" ID: {video_id}")
|
print(f" ID: {video_id}")
|
||||||
return True
|
return True
|
||||||
|
|
||||||
def list_manual_videos(manual_file: str = "data/manual_videos.json"):
|
def list_manual_videos(manual_file: str = None):
|
||||||
|
if manual_file is None:
|
||||||
|
manual_file = str(get_data_path_manager().get_manual_videos_path())
|
||||||
"""List all manual videos."""
|
"""List all manual videos."""
|
||||||
manual_path = Path(manual_file)
|
manual_path = Path(manual_file)
|
||||||
|
|
||||||
@ -108,7 +114,9 @@ def list_manual_videos(manual_file: str = "data/manual_videos.json"):
|
|||||||
print(f" ID: {video['id']}")
|
print(f" ID: {video['id']}")
|
||||||
print()
|
print()
|
||||||
|
|
||||||
def remove_manual_video(video_id: str, manual_file: str = "data/manual_videos.json"):
|
def remove_manual_video(video_id: str, manual_file: str = None):
|
||||||
|
if manual_file is None:
|
||||||
|
manual_file = str(get_data_path_manager().get_manual_videos_path())
|
||||||
"""Remove a manual video by ID."""
|
"""Remove a manual video by ID."""
|
||||||
manual_path = Path(manual_file)
|
manual_path = Path(manual_file)
|
||||||
|
|
||||||
@ -9,6 +9,8 @@ import re
|
|||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
|
from karaoke_downloader.data_path_manager import get_data_path_manager
|
||||||
|
|
||||||
def parse_raw_output_file(raw_file_path):
|
def parse_raw_output_file(raw_file_path):
|
||||||
"""Parse the raw output file and extract valid videos."""
|
"""Parse the raw output file and extract valid videos."""
|
||||||
videos = []
|
videos = []
|
||||||
@ -73,7 +75,9 @@ def parse_raw_output_file(raw_file_path):
|
|||||||
|
|
||||||
return videos
|
return videos
|
||||||
|
|
||||||
def save_cache_file(channel_id, videos, cache_dir="data/channel_cache"):
|
def save_cache_file(channel_id, videos, cache_dir=None):
|
||||||
|
if cache_dir is None:
|
||||||
|
cache_dir = str(get_data_path_manager().get_channel_cache_dir())
|
||||||
"""Save the parsed videos to a cache file."""
|
"""Save the parsed videos to a cache file."""
|
||||||
cache_dir = Path(cache_dir)
|
cache_dir = Path(cache_dir)
|
||||||
cache_dir.mkdir(parents=True, exist_ok=True)
|
cache_dir.mkdir(parents=True, exist_ok=True)
|
||||||
@ -97,7 +101,8 @@ def save_cache_file(channel_id, videos, cache_dir="data/channel_cache"):
|
|||||||
|
|
||||||
def main():
|
def main():
|
||||||
"""Main function to build cache from raw output."""
|
"""Main function to build cache from raw output."""
|
||||||
raw_file_path = Path("data/channel_cache/@VocalStarKaraoke_raw_output.txt")
|
data_path_manager = get_data_path_manager()
|
||||||
|
raw_file_path = data_path_manager.get_channel_cache_dir() / "@VocalStarKaraoke_raw_output.txt"
|
||||||
|
|
||||||
if not raw_file_path.exists():
|
if not raw_file_path.exists():
|
||||||
print(f"❌ Raw output file not found: {raw_file_path}")
|
print(f"❌ Raw output file not found: {raw_file_path}")
|
||||||
@ -2,7 +2,11 @@ import json
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from datetime import datetime, time
|
from datetime import datetime, time
|
||||||
|
|
||||||
def cleanup_recent_tracking(tracking_path="data/songlist_tracking.json", cutoff_time_str="11:00"):
|
from karaoke_downloader.data_path_manager import get_data_path_manager
|
||||||
|
|
||||||
|
def cleanup_recent_tracking(tracking_path=None, cutoff_time_str="11:00"):
|
||||||
|
if tracking_path is None:
|
||||||
|
tracking_path = str(get_data_path_manager().get_songlist_tracking_path())
|
||||||
"""Remove entries from songlist_tracking.json that were added after the specified time today."""
|
"""Remove entries from songlist_tracking.json that were added after the specified time today."""
|
||||||
tracking_file = Path(tracking_path)
|
tracking_file = Path(tracking_path)
|
||||||
if not tracking_file.exists():
|
if not tracking_file.exists():
|
||||||
@ -14,8 +14,12 @@ import shutil
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import List, Dict, Any
|
from typing import List, Dict, Any
|
||||||
|
|
||||||
|
from karaoke_downloader.data_path_manager import get_data_path_manager
|
||||||
|
|
||||||
def reset_karaoke_tracking(tracking_file: str = "data/karaoke_tracking.json") -> None:
|
|
||||||
|
def reset_karaoke_tracking(tracking_file: str = None) -> None:
|
||||||
|
if tracking_file is None:
|
||||||
|
tracking_file = str(get_data_path_manager().get_karaoke_tracking_path())
|
||||||
"""Reset the karaoke tracking file to empty state."""
|
"""Reset the karaoke tracking file to empty state."""
|
||||||
print(f"Resetting {tracking_file}...")
|
print(f"Resetting {tracking_file}...")
|
||||||
|
|
||||||
@ -52,7 +56,9 @@ def delete_downloaded_files(downloads_dir: str = "downloads") -> None:
|
|||||||
print(f"❌ Error deleting {downloads_dir}: {e}")
|
print(f"❌ Error deleting {downloads_dir}: {e}")
|
||||||
|
|
||||||
|
|
||||||
def show_download_stats(tracking_file: str = "data/karaoke_tracking.json") -> None:
|
def show_download_stats(tracking_file: str = None) -> None:
|
||||||
|
if tracking_file is None:
|
||||||
|
tracking_file = str(get_data_path_manager().get_karaoke_tracking_path())
|
||||||
"""Show statistics about current downloads."""
|
"""Show statistics about current downloads."""
|
||||||
if not os.path.exists(tracking_file):
|
if not os.path.exists(tracking_file):
|
||||||
print("No tracking file found.")
|
print("No tracking file found.")
|
||||||
@ -1,11 +1,15 @@
|
|||||||
import json
|
import json
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
|
from karaoke_downloader.data_path_manager import get_data_path_manager
|
||||||
|
|
||||||
def normalize_title(title):
|
def normalize_title(title):
|
||||||
normalized = title.replace("(Karaoke Version)", "").replace("(Karaoke)", "").strip()
|
normalized = title.replace("(Karaoke Version)", "").replace("(Karaoke)", "").strip()
|
||||||
return " ".join(normalized.split()).lower()
|
return " ".join(normalized.split()).lower()
|
||||||
|
|
||||||
def load_songlist(songlist_path="data/songList.json"):
|
def load_songlist(songlist_path=None):
|
||||||
|
if songlist_path is None:
|
||||||
|
songlist_path = str(get_data_path_manager().get_songlist_path())
|
||||||
songlist_file = Path(songlist_path)
|
songlist_file = Path(songlist_path)
|
||||||
if not songlist_file.exists():
|
if not songlist_file.exists():
|
||||||
print(f"⚠️ Songlist file not found: {songlist_path}")
|
print(f"⚠️ Songlist file not found: {songlist_path}")
|
||||||
@ -24,14 +28,18 @@ def load_songlist(songlist_path="data/songList.json"):
|
|||||||
})
|
})
|
||||||
return all_songs
|
return all_songs
|
||||||
|
|
||||||
def load_songlist_tracking(tracking_path="data/songlist_tracking.json"):
|
def load_songlist_tracking(tracking_path=None):
|
||||||
|
if tracking_path is None:
|
||||||
|
tracking_path = str(get_data_path_manager().get_songlist_tracking_path())
|
||||||
tracking_file = Path(tracking_path)
|
tracking_file = Path(tracking_path)
|
||||||
if not tracking_file.exists():
|
if not tracking_file.exists():
|
||||||
return {}
|
return {}
|
||||||
with open(tracking_file, 'r', encoding='utf-8') as f:
|
with open(tracking_file, 'r', encoding='utf-8') as f:
|
||||||
return json.load(f)
|
return json.load(f)
|
||||||
|
|
||||||
def load_server_songs(songs_path="data/songs.json"):
|
def load_server_songs(songs_path=None):
|
||||||
|
if songs_path is None:
|
||||||
|
songs_path = str(get_data_path_manager().get_songs_path())
|
||||||
"""Load the list of songs already available on the server."""
|
"""Load the list of songs already available on the server."""
|
||||||
songs_file = Path(songs_path)
|
songs_file = Path(songs_path)
|
||||||
if not songs_file.exists():
|
if not songs_file.exists():
|
||||||
Loading…
Reference in New Issue
Block a user