513 lines
21 KiB
Python
513 lines
21 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Main CLI application for the Karaoke Song Library Cleanup Tool.
|
|
"""
|
|
import argparse
|
|
import sys
|
|
import os
|
|
from typing import Dict, List, Any
|
|
|
|
# Add the cli directory to the path for imports
|
|
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
|
|
|
|
from utils import load_json_file, save_json_file
|
|
from matching import SongMatcher
|
|
from report import ReportGenerator
|
|
|
|
|
|
def merge_history_objects(data_dir: str, args) -> None:
|
|
"""Merge history objects that match on artist, title, and path, summing their count properties."""
|
|
history_path = os.path.join(data_dir, 'history.json')
|
|
|
|
if not os.path.exists(history_path):
|
|
print(f"History file not found: {history_path}")
|
|
return
|
|
|
|
try:
|
|
# Load current history
|
|
history_items = load_json_file(history_path)
|
|
if not history_items:
|
|
print("No history items found to merge")
|
|
return
|
|
|
|
print(f"\n🔄 Merging history objects...")
|
|
print(f"Processing {len(history_items):,} history entries...")
|
|
|
|
# Create a dictionary to group items by artist, title, and path
|
|
grouped_items = {}
|
|
merged_count = 0
|
|
total_merged_entries = 0
|
|
|
|
for item in history_items:
|
|
if not isinstance(item, dict):
|
|
continue
|
|
|
|
artist = item.get('artist', '').strip()
|
|
title = item.get('title', '').strip()
|
|
path = item.get('path', '').strip()
|
|
|
|
if not artist or not title or not path:
|
|
continue
|
|
|
|
# Create a key for grouping
|
|
key = (artist.lower(), title.lower(), path.lower())
|
|
|
|
if key not in grouped_items:
|
|
grouped_items[key] = []
|
|
grouped_items[key].append(item)
|
|
|
|
# Process groups with multiple items
|
|
merged_items = []
|
|
|
|
for key, items in grouped_items.items():
|
|
if len(items) == 1:
|
|
# Single item, keep as is
|
|
merged_items.append(items[0])
|
|
else:
|
|
# Multiple items, merge them
|
|
artist, title, path = key
|
|
|
|
# Start with the first item as the base
|
|
merged_item = items[0].copy()
|
|
|
|
# Sum the counts (handle both int and string values)
|
|
total_count = 0
|
|
for item in items:
|
|
count_value = item.get('count', 0)
|
|
if isinstance(count_value, str):
|
|
try:
|
|
total_count += int(count_value)
|
|
except ValueError:
|
|
total_count += 0
|
|
else:
|
|
total_count += count_value
|
|
merged_item['count'] = total_count
|
|
|
|
# For boolean properties, if any are True, keep True
|
|
merged_item['favorite'] = any(item.get('favorite', False) for item in items)
|
|
merged_item['disabled'] = any(item.get('disabled', False) for item in items)
|
|
|
|
# For other properties, keep the first non-empty value
|
|
for prop in ['key', 'original_path', 'genre']:
|
|
if prop in merged_item and merged_item[prop]:
|
|
continue
|
|
for item in items[1:]: # Skip first item since we already have it
|
|
if item.get(prop):
|
|
merged_item[prop] = item[prop]
|
|
break
|
|
|
|
merged_items.append(merged_item)
|
|
merged_count += 1
|
|
total_merged_entries += len(items)
|
|
|
|
if args.verbose:
|
|
print(f"Merged {len(items)} entries for '{artist} - {title}': total count = {total_count}")
|
|
|
|
# Save the merged history
|
|
if not args.dry_run:
|
|
save_json_file(merged_items, history_path)
|
|
print(f"✅ Merged {merged_count} groups ({total_merged_entries} total entries → {len(merged_items)} entries)")
|
|
print(f"📁 Saved to: {history_path}")
|
|
else:
|
|
print(f"DRY RUN: Would merge {merged_count} groups ({total_merged_entries} total entries → {len(merged_items)} entries)")
|
|
|
|
except Exception as e:
|
|
print(f"Error merging history objects: {e}")
|
|
|
|
|
|
def process_favorites_and_history(matcher: SongMatcher, all_songs: List[Dict[str, Any]], data_dir: str, args) -> None:
|
|
"""Process favorites and history with priority-based logic to select best versions."""
|
|
|
|
def process_file(file_type: str, file_path: str) -> List[Dict[str, Any]]:
|
|
"""Process a single favorites or history file."""
|
|
try:
|
|
items = load_json_file(file_path)
|
|
if not items:
|
|
print(f"No {file_type} found in {file_path}")
|
|
return []
|
|
|
|
print(f"\nProcessing {len(items):,} {file_type} entries...")
|
|
|
|
# Find matching songs for each item
|
|
processed_items = []
|
|
updated_count = 0
|
|
|
|
for i, item in enumerate(items):
|
|
if not isinstance(item, dict):
|
|
print(f"Warning: Skipping invalid {file_type} item at index {i}")
|
|
continue
|
|
|
|
artist = item.get('artist', '')
|
|
title = item.get('title', '')
|
|
current_path = item.get('path', '')
|
|
|
|
if not artist or not title:
|
|
print(f"Warning: Skipping {file_type} item with missing artist/title at index {i}")
|
|
continue
|
|
|
|
# Find all matching songs for this artist/title
|
|
matching_songs = []
|
|
for song in all_songs:
|
|
if (song.get('artist', '').lower().strip() == artist.lower().strip() and
|
|
song.get('title', '').lower().strip() == title.lower().strip()):
|
|
matching_songs.append(song)
|
|
|
|
if not matching_songs:
|
|
print(f"Warning: No matching songs found for {artist} - {title}")
|
|
processed_items.append(item)
|
|
continue
|
|
|
|
# Use the same priority logic as duplicates
|
|
best_song, skip_songs = matcher.select_best_song(matching_songs, artist, title)
|
|
|
|
if best_song and best_song['path'] != current_path:
|
|
# Update the path to the best version
|
|
item['path'] = best_song['path']
|
|
item['original_path'] = current_path # Keep track of the original
|
|
updated_count += 1
|
|
if args.verbose:
|
|
print(f"Updated {artist} - {title}: {current_path} → {best_song['path']}")
|
|
|
|
processed_items.append(item)
|
|
|
|
# Save the updated file
|
|
if not args.dry_run:
|
|
save_json_file(processed_items, file_path)
|
|
print(f"✅ Updated {updated_count:,} {file_type} entries with best versions")
|
|
print(f"📁 Saved to: {file_path}")
|
|
else:
|
|
print(f"DRY RUN: Would update {updated_count:,} {file_type} entries")
|
|
|
|
return processed_items
|
|
|
|
except Exception as e:
|
|
print(f"Error processing {file_type}: {e}")
|
|
return []
|
|
|
|
# Process favorites if requested
|
|
if args.process_favorites:
|
|
favorites_path = os.path.join(data_dir, 'favorites.json')
|
|
if os.path.exists(favorites_path):
|
|
process_file('favorites', favorites_path)
|
|
else:
|
|
print(f"Favorites file not found: {favorites_path}")
|
|
|
|
# Process history if requested
|
|
if args.process_history:
|
|
history_path = os.path.join(data_dir, 'history.json')
|
|
if os.path.exists(history_path):
|
|
process_file('history', history_path)
|
|
else:
|
|
print(f"History file not found: {history_path}")
|
|
|
|
|
|
def parse_arguments():
|
|
"""Parse command line arguments."""
|
|
parser = argparse.ArgumentParser(
|
|
description="Karaoke Song Library Cleanup Tool",
|
|
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
epilog="""
|
|
Examples:
|
|
python main.py # Run with default settings (generates reports automatically)
|
|
python main.py --verbose # Enable verbose output
|
|
python main.py --config custom_config.json # Use custom config
|
|
python main.py --output-dir ./reports # Save reports to custom directory
|
|
python main.py --dry-run # Analyze without generating files
|
|
python main.py --process-favorites # Process favorites with priority logic (MP4 over MP3)
|
|
python main.py --process-history # Process history with priority logic (MP4 over MP4)
|
|
python main.py --process-all # Process everything: duplicates, generate reports, AND update favorites/history with priority logic
|
|
python main.py --process-all --dry-run # Preview changes without saving
|
|
python main.py --merge-history # Merge history objects that match on artist, title, and path
|
|
python main.py --merge-history --dry-run # Preview history merging without saving
|
|
"""
|
|
)
|
|
|
|
parser.add_argument(
|
|
'--config',
|
|
default='../config/config.json',
|
|
help='Path to configuration file (default: ../config/config.json)'
|
|
)
|
|
|
|
parser.add_argument(
|
|
'--input',
|
|
default=None,
|
|
help='Path to input songs file (default: auto-detected from config)'
|
|
)
|
|
|
|
parser.add_argument(
|
|
'--output-dir',
|
|
default=None,
|
|
help='Directory for output files (default: auto-detected from config)'
|
|
)
|
|
|
|
parser.add_argument(
|
|
'--verbose', '-v',
|
|
action='store_true',
|
|
help='Enable verbose output'
|
|
)
|
|
|
|
parser.add_argument(
|
|
'--dry-run',
|
|
action='store_true',
|
|
help='Analyze songs without generating skip list'
|
|
)
|
|
|
|
parser.add_argument(
|
|
'--save-reports',
|
|
action='store_true',
|
|
help='Save detailed reports to files (now always enabled by default)'
|
|
)
|
|
|
|
parser.add_argument(
|
|
'--show-config',
|
|
action='store_true',
|
|
help='Show current configuration and exit'
|
|
)
|
|
|
|
parser.add_argument(
|
|
'--process-favorites',
|
|
action='store_true',
|
|
help='Process favorites with priority-based logic to select best versions (MP4 over MP3)'
|
|
)
|
|
|
|
parser.add_argument(
|
|
'--process-history',
|
|
action='store_true',
|
|
help='Process history with priority-based logic to select best versions (MP4 over MP3)'
|
|
)
|
|
|
|
parser.add_argument(
|
|
'--process-all',
|
|
action='store_true',
|
|
help='Process everything: duplicates, generate reports, AND update favorites/history with priority logic'
|
|
)
|
|
|
|
parser.add_argument(
|
|
'--merge-history',
|
|
action='store_true',
|
|
help='Merge history objects that match on artist, title, and path, summing their count properties'
|
|
)
|
|
|
|
return parser.parse_args()
|
|
|
|
|
|
def load_config(config_path: str) -> Dict[str, Any]:
|
|
"""Load and validate configuration."""
|
|
try:
|
|
config = load_json_file(config_path)
|
|
print(f"Configuration loaded from: {config_path}")
|
|
return config
|
|
except Exception as e:
|
|
print(f"Error loading configuration: {e}")
|
|
sys.exit(1)
|
|
|
|
|
|
def load_songs(input_path: str) -> List[Dict[str, Any]]:
|
|
"""Load songs from input file."""
|
|
try:
|
|
print(f"Loading songs from: {input_path}")
|
|
songs = load_json_file(input_path)
|
|
|
|
if not isinstance(songs, list):
|
|
raise ValueError("Input file must contain a JSON array")
|
|
|
|
print(f"Loaded {len(songs):,} songs")
|
|
return songs
|
|
except Exception as e:
|
|
print(f"Error loading songs: {e}")
|
|
sys.exit(1)
|
|
|
|
|
|
def main():
|
|
"""Main application entry point."""
|
|
args = parse_arguments()
|
|
|
|
# Load configuration
|
|
config = load_config(args.config)
|
|
|
|
# Override config with command line arguments
|
|
if args.verbose:
|
|
config['output']['verbose'] = True
|
|
|
|
# Show configuration if requested
|
|
if args.show_config:
|
|
reporter = ReportGenerator(config)
|
|
reporter.print_report("config", config)
|
|
return
|
|
|
|
# Determine data directory and input file from config or args
|
|
data_dir = args.output_dir or config.get('data_directory', '../data')
|
|
# Resolve relative paths from CLI directory
|
|
if not os.path.isabs(data_dir):
|
|
data_dir = os.path.join(os.path.dirname(__file__), '..', data_dir)
|
|
input_file = args.input or os.path.join(data_dir, 'songs.json')
|
|
|
|
# Load songs (only if needed for processing)
|
|
songs = None
|
|
matcher = None
|
|
reporter = None
|
|
|
|
if not args.merge_history:
|
|
songs = load_songs(input_file)
|
|
matcher = SongMatcher(config, data_dir)
|
|
reporter = ReportGenerator(config)
|
|
|
|
# Process favorites and history if requested
|
|
if args.process_favorites or args.process_history or args.process_all:
|
|
print("\n🎯 Processing favorites and history with priority logic...")
|
|
print("=" * 60)
|
|
|
|
# If --process-all is used, set both flags
|
|
if args.process_all:
|
|
args.process_favorites = True
|
|
args.process_history = True
|
|
|
|
process_favorites_and_history(matcher, songs, data_dir, args)
|
|
print("\n" + "=" * 60)
|
|
print("Favorites/History processing complete!")
|
|
|
|
# If --process-all, also do the full duplicate analysis and reporting
|
|
if args.process_all:
|
|
print("\n🔄 Processing duplicates and generating reports...")
|
|
print("=" * 60)
|
|
else:
|
|
return
|
|
|
|
# Merge history objects if requested (separate operation)
|
|
if args.merge_history:
|
|
print("\n🔄 Merging history objects...")
|
|
print("=" * 60)
|
|
merge_history_objects(data_dir, args)
|
|
print("\n" + "=" * 60)
|
|
print("History merging complete!")
|
|
return
|
|
|
|
# If not processing favorites/history OR if --process-all, do the full analysis
|
|
if not (args.process_favorites or args.process_history) or args.process_all:
|
|
print("\nStarting song analysis...")
|
|
print("=" * 60)
|
|
|
|
# Process songs
|
|
try:
|
|
best_songs, skip_songs, stats = matcher.process_songs(songs)
|
|
|
|
# Generate reports
|
|
print("\n" + "=" * 60)
|
|
reporter.print_report("summary", stats)
|
|
|
|
# Add channel priority report
|
|
if config.get('channel_priorities'):
|
|
channel_report = reporter.generate_channel_priority_report(stats, config['channel_priorities'])
|
|
print("\n" + channel_report)
|
|
|
|
if config['output']['verbose']:
|
|
duplicate_info = matcher.get_detailed_duplicate_info(songs)
|
|
reporter.print_report("duplicates", duplicate_info)
|
|
|
|
reporter.print_report("skip_summary", skip_songs)
|
|
|
|
# Save skip list if not dry run
|
|
if not args.dry_run and skip_songs:
|
|
skip_list_path = os.path.join(data_dir, 'skipSongs.json')
|
|
|
|
# Create simplified skip list (just paths and reasons) with deduplication
|
|
seen_paths = set()
|
|
simple_skip_list = []
|
|
duplicate_count = 0
|
|
|
|
for skip_song in skip_songs:
|
|
path = skip_song['path']
|
|
if path not in seen_paths:
|
|
seen_paths.add(path)
|
|
skip_entry = {'path': path}
|
|
if config['output']['include_reasons']:
|
|
skip_entry['reason'] = skip_song['reason']
|
|
simple_skip_list.append(skip_entry)
|
|
else:
|
|
duplicate_count += 1
|
|
|
|
save_json_file(simple_skip_list, skip_list_path)
|
|
print(f"\nSkip list saved to: {skip_list_path}")
|
|
print(f"Total songs to skip: {len(simple_skip_list):,}")
|
|
if duplicate_count > 0:
|
|
print(f"Removed {duplicate_count:,} duplicate entries from skip list")
|
|
elif args.dry_run:
|
|
print("\nDRY RUN MODE: No skip list generated")
|
|
|
|
# Always generate detailed reports (not just when --save-reports is used)
|
|
if not args.dry_run:
|
|
reports_dir = os.path.join(data_dir, 'reports')
|
|
os.makedirs(reports_dir, exist_ok=True)
|
|
|
|
print(f"\n📊 Generating enhanced analysis reports...")
|
|
|
|
# Analyze skip patterns
|
|
skip_analysis = reporter.analyze_skip_patterns(skip_songs)
|
|
|
|
# Analyze channel optimization
|
|
channel_analysis = reporter.analyze_channel_optimization(stats, skip_analysis)
|
|
|
|
# Generate and save enhanced reports
|
|
enhanced_summary = reporter.generate_enhanced_summary_report(stats, skip_analysis)
|
|
reporter.save_report_to_file(enhanced_summary, os.path.join(reports_dir, 'enhanced_summary_report.txt'))
|
|
|
|
channel_optimization = reporter.generate_channel_optimization_report(channel_analysis)
|
|
reporter.save_report_to_file(channel_optimization, os.path.join(reports_dir, 'channel_optimization_report.txt'))
|
|
|
|
duplicate_patterns = reporter.generate_duplicate_pattern_report(skip_analysis)
|
|
reporter.save_report_to_file(duplicate_patterns, os.path.join(reports_dir, 'duplicate_pattern_report.txt'))
|
|
|
|
actionable_insights = reporter.generate_actionable_insights_report(stats, skip_analysis, channel_analysis)
|
|
reporter.save_report_to_file(actionable_insights, os.path.join(reports_dir, 'actionable_insights_report.txt'))
|
|
|
|
# Generate detailed duplicate analysis
|
|
detailed_duplicates = reporter.generate_detailed_duplicate_analysis(skip_songs, best_songs)
|
|
reporter.save_report_to_file(detailed_duplicates, os.path.join(reports_dir, 'detailed_duplicate_analysis.txt'))
|
|
|
|
# Save original reports for compatibility
|
|
summary_report = reporter.generate_summary_report(stats)
|
|
reporter.save_report_to_file(summary_report, os.path.join(reports_dir, 'summary_report.txt'))
|
|
|
|
skip_report = reporter.generate_skip_list_summary(skip_songs)
|
|
reporter.save_report_to_file(skip_report, os.path.join(reports_dir, 'skip_list_summary.txt'))
|
|
|
|
# Save detailed duplicate report if verbose
|
|
if config['output']['verbose']:
|
|
duplicate_info = matcher.get_detailed_duplicate_info(songs)
|
|
duplicate_report = reporter.generate_duplicate_details(duplicate_info)
|
|
reporter.save_report_to_file(duplicate_report, os.path.join(reports_dir, 'duplicate_details.txt'))
|
|
|
|
# Save analysis data as JSON for further processing
|
|
analysis_data = {
|
|
'stats': stats,
|
|
'skip_analysis': skip_analysis,
|
|
'channel_analysis': channel_analysis,
|
|
'timestamp': __import__('datetime').datetime.now().isoformat()
|
|
}
|
|
save_json_file(analysis_data, os.path.join(reports_dir, 'analysis_data.json'))
|
|
|
|
# Save full skip list data (this is what the web UI needs)
|
|
save_json_file(skip_songs, os.path.join(reports_dir, 'skip_songs_detailed.json'))
|
|
|
|
print(f"✅ Enhanced reports saved to: {reports_dir}")
|
|
print(f"📋 Generated reports:")
|
|
print(f" • enhanced_summary_report.txt - Comprehensive analysis")
|
|
print(f" • channel_optimization_report.txt - Priority optimization suggestions")
|
|
print(f" • duplicate_pattern_report.txt - Duplicate pattern analysis")
|
|
print(f" • actionable_insights_report.txt - Recommendations and insights")
|
|
print(f" • detailed_duplicate_analysis.txt - Specific songs and their duplicates")
|
|
print(f" • analysis_data.json - Raw analysis data for further processing")
|
|
print(f" • skip_songs_detailed.json - Web UI data (always generated)")
|
|
elif args.dry_run:
|
|
print("\nDRY RUN MODE: No reports generated")
|
|
|
|
print("\n" + "=" * 60)
|
|
print("Analysis complete!")
|
|
|
|
except Exception as e:
|
|
print(f"\nError during processing: {e}")
|
|
sys.exit(1)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main() |