#!/usr/bin/env python3 """ Main CLI application for the Karaoke Song Library Cleanup Tool. """ import argparse import sys import os from typing import Dict, List, Any # Add the cli directory to the path for imports sys.path.append(os.path.dirname(os.path.abspath(__file__))) from utils import load_json_file, save_json_file from matching import SongMatcher from report import ReportGenerator def merge_history_objects(data_dir: str, args) -> None: """Merge history objects that match on artist, title, and path, summing their count properties.""" history_path = os.path.join(data_dir, 'history.json') if not os.path.exists(history_path): print(f"History file not found: {history_path}") return try: # Load current history history_items = load_json_file(history_path) if not history_items: print("No history items found to merge") return print(f"\nšŸ”„ Merging history objects...") print(f"Processing {len(history_items):,} history entries...") # Create a dictionary to group items by artist, title, and path grouped_items = {} merged_count = 0 total_merged_entries = 0 for item in history_items: if not isinstance(item, dict): continue artist = item.get('artist', '').strip() title = item.get('title', '').strip() path = item.get('path', '').strip() if not artist or not title or not path: continue # Create a key for grouping key = (artist.lower(), title.lower(), path.lower()) if key not in grouped_items: grouped_items[key] = [] grouped_items[key].append(item) # Process groups with multiple items merged_items = [] for key, items in grouped_items.items(): if len(items) == 1: # Single item, keep as is merged_items.append(items[0]) else: # Multiple items, merge them artist, title, path = key # Start with the first item as the base merged_item = items[0].copy() # Sum the counts (handle both int and string values) total_count = 0 for item in items: count_value = item.get('count', 0) if isinstance(count_value, str): try: total_count += int(count_value) except ValueError: total_count += 0 else: total_count += count_value merged_item['count'] = total_count # For boolean properties, if any are True, keep True merged_item['favorite'] = any(item.get('favorite', False) for item in items) merged_item['disabled'] = any(item.get('disabled', False) for item in items) # For other properties, keep the first non-empty value for prop in ['key', 'original_path', 'genre']: if prop in merged_item and merged_item[prop]: continue for item in items[1:]: # Skip first item since we already have it if item.get(prop): merged_item[prop] = item[prop] break merged_items.append(merged_item) merged_count += 1 total_merged_entries += len(items) if args.verbose: print(f"Merged {len(items)} entries for '{artist} - {title}': total count = {total_count}") # Save the merged history if not args.dry_run: save_json_file(merged_items, history_path) print(f"āœ… Merged {merged_count} groups ({total_merged_entries} total entries → {len(merged_items)} entries)") print(f"šŸ“ Saved to: {history_path}") else: print(f"DRY RUN: Would merge {merged_count} groups ({total_merged_entries} total entries → {len(merged_items)} entries)") except Exception as e: print(f"Error merging history objects: {e}") def process_favorites_and_history(matcher: SongMatcher, all_songs: List[Dict[str, Any]], data_dir: str, args) -> None: """Process favorites and history with priority-based logic to select best versions.""" def process_file(file_type: str, file_path: str) -> List[Dict[str, Any]]: """Process a single favorites or history file.""" try: items = load_json_file(file_path) if not items: print(f"No {file_type} found in {file_path}") return [] print(f"\nProcessing {len(items):,} {file_type} entries...") # Find matching songs for each item processed_items = [] updated_count = 0 for i, item in enumerate(items): if not isinstance(item, dict): print(f"Warning: Skipping invalid {file_type} item at index {i}") continue artist = item.get('artist', '') title = item.get('title', '') current_path = item.get('path', '') if not artist or not title: print(f"Warning: Skipping {file_type} item with missing artist/title at index {i}") continue # Find all matching songs for this artist/title matching_songs = [] for song in all_songs: if (song.get('artist', '').lower().strip() == artist.lower().strip() and song.get('title', '').lower().strip() == title.lower().strip()): matching_songs.append(song) if not matching_songs: print(f"Warning: No matching songs found for {artist} - {title}") processed_items.append(item) continue # Use the same priority logic as duplicates best_song, skip_songs = matcher.select_best_song(matching_songs, artist, title) if best_song and best_song['path'] != current_path: # Update the path to the best version item['path'] = best_song['path'] item['original_path'] = current_path # Keep track of the original updated_count += 1 if args.verbose: print(f"Updated {artist} - {title}: {current_path} → {best_song['path']}") processed_items.append(item) # Save the updated file if not args.dry_run: save_json_file(processed_items, file_path) print(f"āœ… Updated {updated_count:,} {file_type} entries with best versions") print(f"šŸ“ Saved to: {file_path}") else: print(f"DRY RUN: Would update {updated_count:,} {file_type} entries") return processed_items except Exception as e: print(f"Error processing {file_type}: {e}") return [] # Process favorites if requested if args.process_favorites: favorites_path = os.path.join(data_dir, 'favorites.json') if os.path.exists(favorites_path): process_file('favorites', favorites_path) else: print(f"Favorites file not found: {favorites_path}") # Process history if requested if args.process_history: history_path = os.path.join(data_dir, 'history.json') if os.path.exists(history_path): process_file('history', history_path) else: print(f"History file not found: {history_path}") def parse_arguments(): """Parse command line arguments.""" parser = argparse.ArgumentParser( description="Karaoke Song Library Cleanup Tool", formatter_class=argparse.RawDescriptionHelpFormatter, epilog=""" Examples: python main.py # Run with default settings (generates reports automatically) python main.py --verbose # Enable verbose output python main.py --config custom_config.json # Use custom config python main.py --output-dir ./reports # Save reports to custom directory python main.py --dry-run # Analyze without generating files python main.py --process-favorites # Process favorites with priority logic (MP4 over MP3) python main.py --process-history # Process history with priority logic (MP4 over MP4) python main.py --process-all # Process everything: duplicates, generate reports, AND update favorites/history with priority logic python main.py --process-all --dry-run # Preview changes without saving python main.py --merge-history # Merge history objects that match on artist, title, and path python main.py --merge-history --dry-run # Preview history merging without saving """ ) parser.add_argument( '--config', default='../config/config.json', help='Path to configuration file (default: ../config/config.json)' ) parser.add_argument( '--input', default=None, help='Path to input songs file (default: auto-detected from config)' ) parser.add_argument( '--output-dir', default=None, help='Directory for output files (default: auto-detected from config)' ) parser.add_argument( '--verbose', '-v', action='store_true', help='Enable verbose output' ) parser.add_argument( '--dry-run', action='store_true', help='Analyze songs without generating skip list' ) parser.add_argument( '--save-reports', action='store_true', help='Save detailed reports to files (now always enabled by default)' ) parser.add_argument( '--show-config', action='store_true', help='Show current configuration and exit' ) parser.add_argument( '--process-favorites', action='store_true', help='Process favorites with priority-based logic to select best versions (MP4 over MP3)' ) parser.add_argument( '--process-history', action='store_true', help='Process history with priority-based logic to select best versions (MP4 over MP3)' ) parser.add_argument( '--process-all', action='store_true', help='Process everything: duplicates, generate reports, AND update favorites/history with priority logic' ) parser.add_argument( '--merge-history', action='store_true', help='Merge history objects that match on artist, title, and path, summing their count properties' ) return parser.parse_args() def load_config(config_path: str) -> Dict[str, Any]: """Load and validate configuration.""" try: config = load_json_file(config_path) print(f"Configuration loaded from: {config_path}") return config except Exception as e: print(f"Error loading configuration: {e}") sys.exit(1) def load_songs(input_path: str) -> List[Dict[str, Any]]: """Load songs from input file.""" try: print(f"Loading songs from: {input_path}") songs = load_json_file(input_path) if not isinstance(songs, list): raise ValueError("Input file must contain a JSON array") print(f"Loaded {len(songs):,} songs") return songs except Exception as e: print(f"Error loading songs: {e}") sys.exit(1) def main(): """Main application entry point.""" args = parse_arguments() # Load configuration config = load_config(args.config) # Override config with command line arguments if args.verbose: config['output']['verbose'] = True # Show configuration if requested if args.show_config: reporter = ReportGenerator(config) reporter.print_report("config", config) return # Determine data directory and input file from config or args data_dir = args.output_dir or config.get('data_directory', '../data') # Resolve relative paths from CLI directory if not os.path.isabs(data_dir): data_dir = os.path.join(os.path.dirname(__file__), '..', data_dir) input_file = args.input or os.path.join(data_dir, 'songs.json') # Load songs (only if needed for processing) songs = None matcher = None reporter = None if not args.merge_history: songs = load_songs(input_file) matcher = SongMatcher(config, data_dir) reporter = ReportGenerator(config) # Process favorites and history if requested if args.process_favorites or args.process_history or args.process_all: print("\nšŸŽÆ Processing favorites and history with priority logic...") print("=" * 60) # If --process-all is used, set both flags if args.process_all: args.process_favorites = True args.process_history = True process_favorites_and_history(matcher, songs, data_dir, args) print("\n" + "=" * 60) print("Favorites/History processing complete!") # If --process-all, also do the full duplicate analysis and reporting if args.process_all: print("\nšŸ”„ Processing duplicates and generating reports...") print("=" * 60) else: return # Merge history objects if requested (separate operation) if args.merge_history: print("\nšŸ”„ Merging history objects...") print("=" * 60) merge_history_objects(data_dir, args) print("\n" + "=" * 60) print("History merging complete!") return # If not processing favorites/history OR if --process-all, do the full analysis if not (args.process_favorites or args.process_history) or args.process_all: print("\nStarting song analysis...") print("=" * 60) # Process songs try: best_songs, skip_songs, stats = matcher.process_songs(songs) # Generate reports print("\n" + "=" * 60) reporter.print_report("summary", stats) # Add channel priority report if config.get('channel_priorities'): channel_report = reporter.generate_channel_priority_report(stats, config['channel_priorities']) print("\n" + channel_report) if config['output']['verbose']: duplicate_info = matcher.get_detailed_duplicate_info(songs) reporter.print_report("duplicates", duplicate_info) reporter.print_report("skip_summary", skip_songs) # Save skip list if not dry run if not args.dry_run and skip_songs: skip_list_path = os.path.join(data_dir, 'skipSongs.json') # Create simplified skip list (just paths and reasons) with deduplication seen_paths = set() simple_skip_list = [] duplicate_count = 0 for skip_song in skip_songs: path = skip_song['path'] if path not in seen_paths: seen_paths.add(path) skip_entry = {'path': path} if config['output']['include_reasons']: skip_entry['reason'] = skip_song['reason'] simple_skip_list.append(skip_entry) else: duplicate_count += 1 save_json_file(simple_skip_list, skip_list_path) print(f"\nSkip list saved to: {skip_list_path}") print(f"Total songs to skip: {len(simple_skip_list):,}") if duplicate_count > 0: print(f"Removed {duplicate_count:,} duplicate entries from skip list") elif args.dry_run: print("\nDRY RUN MODE: No skip list generated") # Always generate detailed reports (not just when --save-reports is used) if not args.dry_run: reports_dir = os.path.join(data_dir, 'reports') os.makedirs(reports_dir, exist_ok=True) print(f"\nšŸ“Š Generating enhanced analysis reports...") # Analyze skip patterns skip_analysis = reporter.analyze_skip_patterns(skip_songs) # Analyze channel optimization channel_analysis = reporter.analyze_channel_optimization(stats, skip_analysis) # Generate and save enhanced reports enhanced_summary = reporter.generate_enhanced_summary_report(stats, skip_analysis) reporter.save_report_to_file(enhanced_summary, os.path.join(reports_dir, 'enhanced_summary_report.txt')) channel_optimization = reporter.generate_channel_optimization_report(channel_analysis) reporter.save_report_to_file(channel_optimization, os.path.join(reports_dir, 'channel_optimization_report.txt')) duplicate_patterns = reporter.generate_duplicate_pattern_report(skip_analysis) reporter.save_report_to_file(duplicate_patterns, os.path.join(reports_dir, 'duplicate_pattern_report.txt')) actionable_insights = reporter.generate_actionable_insights_report(stats, skip_analysis, channel_analysis) reporter.save_report_to_file(actionable_insights, os.path.join(reports_dir, 'actionable_insights_report.txt')) # Generate detailed duplicate analysis detailed_duplicates = reporter.generate_detailed_duplicate_analysis(skip_songs, best_songs) reporter.save_report_to_file(detailed_duplicates, os.path.join(reports_dir, 'detailed_duplicate_analysis.txt')) # Save original reports for compatibility summary_report = reporter.generate_summary_report(stats) reporter.save_report_to_file(summary_report, os.path.join(reports_dir, 'summary_report.txt')) skip_report = reporter.generate_skip_list_summary(skip_songs) reporter.save_report_to_file(skip_report, os.path.join(reports_dir, 'skip_list_summary.txt')) # Save detailed duplicate report if verbose if config['output']['verbose']: duplicate_info = matcher.get_detailed_duplicate_info(songs) duplicate_report = reporter.generate_duplicate_details(duplicate_info) reporter.save_report_to_file(duplicate_report, os.path.join(reports_dir, 'duplicate_details.txt')) # Save analysis data as JSON for further processing analysis_data = { 'stats': stats, 'skip_analysis': skip_analysis, 'channel_analysis': channel_analysis, 'timestamp': __import__('datetime').datetime.now().isoformat() } save_json_file(analysis_data, os.path.join(reports_dir, 'analysis_data.json')) # Save full skip list data (this is what the web UI needs) save_json_file(skip_songs, os.path.join(reports_dir, 'skip_songs_detailed.json')) print(f"āœ… Enhanced reports saved to: {reports_dir}") print(f"šŸ“‹ Generated reports:") print(f" • enhanced_summary_report.txt - Comprehensive analysis") print(f" • channel_optimization_report.txt - Priority optimization suggestions") print(f" • duplicate_pattern_report.txt - Duplicate pattern analysis") print(f" • actionable_insights_report.txt - Recommendations and insights") print(f" • detailed_duplicate_analysis.txt - Specific songs and their duplicates") print(f" • analysis_data.json - Raw analysis data for further processing") print(f" • skip_songs_detailed.json - Web UI data (always generated)") elif args.dry_run: print("\nDRY RUN MODE: No reports generated") print("\n" + "=" * 60) print("Analysis complete!") except Exception as e: print(f"\nError during processing: {e}") sys.exit(1) if __name__ == "__main__": main()