KaraokeMerge/cli/main.py

#!/usr/bin/env python3
"""
Main CLI application for the Karaoke Song Library Cleanup Tool.
"""
import argparse
import sys
import os
from typing import Dict, List, Any

# Add the cli directory to the path for imports
sys.path.append(os.path.dirname(os.path.abspath(__file__)))

from utils import load_json_file, save_json_file
from matching import SongMatcher
from report import ReportGenerator


def merge_history_objects(data_dir: str, args) -> None:
    """Merge history objects that match on artist, title, and path, summing their count properties."""
    history_path = os.path.join(data_dir, 'history.json')

    if not os.path.exists(history_path):
        print(f"History file not found: {history_path}")
        return

    try:
        # Load current history
        history_items = load_json_file(history_path)
        if not history_items:
            print("No history items found to merge")
            return

        print(f"\n🔄 Merging history objects...")
        print(f"Processing {len(history_items):,} history entries...")

        # Create a dictionary to group items by artist, title, and path
        grouped_items = {}
        merged_count = 0
        total_merged_entries = 0

        for item in history_items:
            if not isinstance(item, dict):
                continue

            artist = item.get('artist', '').strip()
            title = item.get('title', '').strip()
            path = item.get('path', '').strip()

            if not artist or not title or not path:
                continue

            # Create a key for grouping
            key = (artist.lower(), title.lower(), path.lower())

            if key not in grouped_items:
                grouped_items[key] = []
            grouped_items[key].append(item)

        # Process groups with multiple items
        merged_items = []

        for key, items in grouped_items.items():
            if len(items) == 1:
                # Single item, keep as is
                merged_items.append(items[0])
            else:
                # Multiple items, merge them
                artist, title, path = key

                # Start with the first item as the base
                merged_item = items[0].copy()

                # Sum the counts (handle both int and string values)
                total_count = 0
                for item in items:
                    count_value = item.get('count', 0)
                    if isinstance(count_value, str):
                        try:
                            total_count += int(count_value)
                        except ValueError:
                            total_count += 0
                    else:
                        total_count += count_value
                merged_item['count'] = total_count

                # For boolean properties, if any are True, keep True
                merged_item['favorite'] = any(item.get('favorite', False) for item in items)
                merged_item['disabled'] = any(item.get('disabled', False) for item in items)

                # For other properties, keep the first non-empty value
                for prop in ['key', 'original_path', 'genre']:
                    if prop in merged_item and merged_item[prop]:
                        continue
                    for item in items[1:]:  # Skip first item since we already have it
                        if item.get(prop):
                            merged_item[prop] = item[prop]
                            break

                merged_items.append(merged_item)
                merged_count += 1
                total_merged_entries += len(items)

                if args.verbose:
                    print(f"Merged {len(items)} entries for '{artist} - {title}': total count = {total_count}")

        # Save the merged history
        if not args.dry_run:
            save_json_file(merged_items, history_path)
            print(f"✅ Merged {merged_count} groups ({total_merged_entries} total entries → {len(merged_items)} entries)")
            print(f"📁 Saved to: {history_path}")
        else:
            print(f"DRY RUN: Would merge {merged_count} groups ({total_merged_entries} total entries → {len(merged_items)} entries)")

    except Exception as e:
        print(f"Error merging history objects: {e}")


def process_favorites_and_history(matcher: SongMatcher, all_songs: List[Dict[str, Any]], data_dir: str, args) -> None:
    """Process favorites and history with priority-based logic to select best versions."""

    def process_file(file_type: str, file_path: str) -> List[Dict[str, Any]]:
        """Process a single favorites or history file."""
        try:
            items = load_json_file(file_path)
            if not items:
                print(f"No {file_type} found in {file_path}")
                return []

            print(f"\nProcessing {len(items):,} {file_type} entries...")

            # Find matching songs for each item
            processed_items = []
            updated_count = 0

            for i, item in enumerate(items):
                if not isinstance(item, dict):
                    print(f"Warning: Skipping invalid {file_type} item at index {i}")
                    continue

                artist = item.get('artist', '')
                title = item.get('title', '')
                current_path = item.get('path', '')

                if not artist or not title:
                    print(f"Warning: Skipping {file_type} item with missing artist/title at index {i}")
                    continue

                # Find all matching songs for this artist/title
                matching_songs = []
                for song in all_songs:
                    if (song.get('artist', '').lower().strip() == artist.lower().strip() and
                        song.get('title', '').lower().strip() == title.lower().strip()):
                        matching_songs.append(song)

                if not matching_songs:
                    print(f"Warning: No matching songs found for {artist} - {title}")
                    processed_items.append(item)
                    continue

                # Use the same priority logic as duplicates
                best_song, skip_songs = matcher.select_best_song(matching_songs, artist, title)

                if best_song and best_song['path'] != current_path:
                    # Update the path to the best version
                    item['path'] = best_song['path']
                    item['original_path'] = current_path  # Keep track of the original
                    updated_count += 1
                    if args.verbose:
                        print(f"Updated {artist} - {title}: {current_path} → {best_song['path']}")

                processed_items.append(item)

            # Save the updated file
            if not args.dry_run:
                save_json_file(processed_items, file_path)
                print(f"✅ Updated {updated_count:,} {file_type} entries with best versions")
                print(f"📁 Saved to: {file_path}")
            else:
                print(f"DRY RUN: Would update {updated_count:,} {file_type} entries")

            return processed_items

        except Exception as e:
            print(f"Error processing {file_type}: {e}")
            return []

    # Process favorites if requested
    if args.process_favorites:
        favorites_path = os.path.join(data_dir, 'favorites.json')
        if os.path.exists(favorites_path):
            process_file('favorites', favorites_path)
        else:
            print(f"Favorites file not found: {favorites_path}")

    # Process history if requested
    if args.process_history:
        history_path = os.path.join(data_dir, 'history.json')
        if os.path.exists(history_path):
            process_file('history', history_path)
        else:
            print(f"History file not found: {history_path}")


def parse_arguments():
    """Parse command line arguments."""
    parser = argparse.ArgumentParser(
        description="Karaoke Song Library Cleanup Tool",
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog="""
Examples:
  python main.py                                    # Run with default settings (generates reports automatically)
  python main.py --verbose                          # Enable verbose output
  python main.py --config custom_config.json        # Use custom config
  python main.py --output-dir ./reports             # Save reports to custom directory
  python main.py --dry-run                          # Analyze without generating files
  python main.py --process-favorites                # Process favorites with priority logic (MP4 over MP3)
  python main.py --process-history                  # Process history with priority logic (MP4 over MP4)
  python main.py --process-all                      # Process everything: duplicates, generate reports, AND update favorites/history with priority logic
  python main.py --process-all --dry-run            # Preview changes without saving
  python main.py --merge-history                    # Merge history objects that match on artist, title, and path
  python main.py --merge-history --dry-run          # Preview history merging without saving
        """
    )

    parser.add_argument(
        '--config',
        default='../config/config.json',
        help='Path to configuration file (default: ../config/config.json)'
    )

    parser.add_argument(
        '--input',
        default=None,
        help='Path to input songs file (default: auto-detected from config)'
    )

    parser.add_argument(
        '--output-dir',
        default=None,
        help='Directory for output files (default: auto-detected from config)'
    )

    parser.add_argument(
        '--verbose', '-v',
        action='store_true',
        help='Enable verbose output'
    )

    parser.add_argument(
        '--dry-run',
        action='store_true',
        help='Analyze songs without generating skip list'
    )

    parser.add_argument(
        '--save-reports',
        action='store_true',
        help='Save detailed reports to files (now always enabled by default)'
    )

    parser.add_argument(
        '--show-config',
        action='store_true',
        help='Show current configuration and exit'
    )

    parser.add_argument(
        '--process-favorites',
        action='store_true',
        help='Process favorites with priority-based logic to select best versions (MP4 over MP3)'
    )

    parser.add_argument(
        '--process-history',
        action='store_true',
        help='Process history with priority-based logic to select best versions (MP4 over MP3)'
    )

    parser.add_argument(
        '--process-all',
        action='store_true',
        help='Process everything: duplicates, generate reports, AND update favorites/history with priority logic'
    )

    parser.add_argument(
        '--merge-history',
        action='store_true',
        help='Merge history objects that match on artist, title, and path, summing their count properties'
    )

    return parser.parse_args()


def load_config(config_path: str) -> Dict[str, Any]:
    """Load and validate configuration."""
    try:
        config = load_json_file(config_path)
        print(f"Configuration loaded from: {config_path}")
        return config
    except Exception as e:
        print(f"Error loading configuration: {e}")
        sys.exit(1)


def load_songs(input_path: str) -> List[Dict[str, Any]]:
    """Load songs from input file."""
    try:
        print(f"Loading songs from: {input_path}")
        songs = load_json_file(input_path)

        if not isinstance(songs, list):
            raise ValueError("Input file must contain a JSON array")

        print(f"Loaded {len(songs):,} songs")
        return songs
    except Exception as e:
        print(f"Error loading songs: {e}")
        sys.exit(1)


def main():
    """Main application entry point."""
    args = parse_arguments()

    # Load configuration
    config = load_config(args.config)

    # Override config with command line arguments
    if args.verbose:
        config['output']['verbose'] = True

    # Show configuration if requested
    if args.show_config:
        reporter = ReportGenerator(config)
        reporter.print_report("config", config)
        return

    # Determine data directory and input file from config or args
    data_dir = args.output_dir or config.get('data_directory', '../data')
    # Resolve relative paths from CLI directory
    if not os.path.isabs(data_dir):
        data_dir = os.path.join(os.path.dirname(__file__), '..', data_dir)
    input_file = args.input or os.path.join(data_dir, 'songs.json')

    # Load songs (only if needed for processing)
    songs = None
    matcher = None
    reporter = None

    if not args.merge_history:
        songs = load_songs(input_file)
        matcher = SongMatcher(config, data_dir)
        reporter = ReportGenerator(config)

    # Process favorites and history if requested
    if args.process_favorites or args.process_history or args.process_all:
        print("\n🎯 Processing favorites and history with priority logic...")
        print("=" * 60)

        # If --process-all is used, set both flags
        if args.process_all:
            args.process_favorites = True
            args.process_history = True

        process_favorites_and_history(matcher, songs, data_dir, args)
        print("\n" + "=" * 60)
        print("Favorites/History processing complete!")

        # If --process-all, also do the full duplicate analysis and reporting
        if args.process_all:
            print("\n🔄 Processing duplicates and generating reports...")
            print("=" * 60)
        else:
            return

    # Merge history objects if requested (separate operation)
    if args.merge_history:
        print("\n🔄 Merging history objects...")
        print("=" * 60)
        merge_history_objects(data_dir, args)
        print("\n" + "=" * 60)
        print("History merging complete!")
        return

    # If not processing favorites/history OR if --process-all, do the full analysis
    if not (args.process_favorites or args.process_history) or args.process_all:
        print("\nStarting song analysis...")
        print("=" * 60)

        # Process songs
        try:
            best_songs, skip_songs, stats = matcher.process_songs(songs)

            # Generate reports
            print("\n" + "=" * 60)
            reporter.print_report("summary", stats)

            # Add channel priority report
            if config.get('channel_priorities'):
                channel_report = reporter.generate_channel_priority_report(stats, config['channel_priorities'])
                print("\n" + channel_report)

            if config['output']['verbose']:
                duplicate_info = matcher.get_detailed_duplicate_info(songs)
                reporter.print_report("duplicates", duplicate_info)

            reporter.print_report("skip_summary", skip_songs)

            # Save skip list if not dry run
            if not args.dry_run and skip_songs:
                skip_list_path = os.path.join(data_dir, 'skipSongs.json')

                # Create simplified skip list (just paths and reasons) with deduplication
                seen_paths = set()
                simple_skip_list = []
                duplicate_count = 0

                for skip_song in skip_songs:
                    path = skip_song['path']
                    if path not in seen_paths:
                        seen_paths.add(path)
                        skip_entry = {'path': path}
                        if config['output']['include_reasons']:
                            skip_entry['reason'] = skip_song['reason']
                        simple_skip_list.append(skip_entry)
                    else:
                        duplicate_count += 1

                save_json_file(simple_skip_list, skip_list_path)
                print(f"\nSkip list saved to: {skip_list_path}")
                print(f"Total songs to skip: {len(simple_skip_list):,}")
                if duplicate_count > 0:
                    print(f"Removed {duplicate_count:,} duplicate entries from skip list")
            elif args.dry_run:
                print("\nDRY RUN MODE: No skip list generated")

            # Always generate detailed reports (not just when --save-reports is used)
            if not args.dry_run:
                reports_dir = os.path.join(data_dir, 'reports')
                os.makedirs(reports_dir, exist_ok=True)

                print(f"\n📊 Generating enhanced analysis reports...")

                # Analyze skip patterns
                skip_analysis = reporter.analyze_skip_patterns(skip_songs)

                # Analyze channel optimization
                channel_analysis = reporter.analyze_channel_optimization(stats, skip_analysis)

                # Generate and save enhanced reports
                enhanced_summary = reporter.generate_enhanced_summary_report(stats, skip_analysis)
                reporter.save_report_to_file(enhanced_summary, os.path.join(reports_dir, 'enhanced_summary_report.txt'))

                channel_optimization = reporter.generate_channel_optimization_report(channel_analysis)
                reporter.save_report_to_file(channel_optimization, os.path.join(reports_dir, 'channel_optimization_report.txt'))

                duplicate_patterns = reporter.generate_duplicate_pattern_report(skip_analysis)
                reporter.save_report_to_file(duplicate_patterns, os.path.join(reports_dir, 'duplicate_pattern_report.txt'))

                actionable_insights = reporter.generate_actionable_insights_report(stats, skip_analysis, channel_analysis)
                reporter.save_report_to_file(actionable_insights, os.path.join(reports_dir, 'actionable_insights_report.txt'))

                # Generate detailed duplicate analysis
                detailed_duplicates = reporter.generate_detailed_duplicate_analysis(skip_songs, best_songs)
                reporter.save_report_to_file(detailed_duplicates, os.path.join(reports_dir, 'detailed_duplicate_analysis.txt'))

                # Save original reports for compatibility
                summary_report = reporter.generate_summary_report(stats)
                reporter.save_report_to_file(summary_report, os.path.join(reports_dir, 'summary_report.txt'))

                skip_report = reporter.generate_skip_list_summary(skip_songs)
                reporter.save_report_to_file(skip_report, os.path.join(reports_dir, 'skip_list_summary.txt'))

                # Save detailed duplicate report if verbose
                if config['output']['verbose']:
                    duplicate_info = matcher.get_detailed_duplicate_info(songs)
                    duplicate_report = reporter.generate_duplicate_details(duplicate_info)
                    reporter.save_report_to_file(duplicate_report, os.path.join(reports_dir, 'duplicate_details.txt'))

                # Save analysis data as JSON for further processing
                analysis_data = {
                    'stats': stats,
                    'skip_analysis': skip_analysis,
                    'channel_analysis': channel_analysis,
                    'timestamp': __import__('datetime').datetime.now().isoformat()
                }
                save_json_file(analysis_data, os.path.join(reports_dir, 'analysis_data.json'))

                # Save full skip list data (this is what the web UI needs)
                save_json_file(skip_songs, os.path.join(reports_dir, 'skip_songs_detailed.json'))

                print(f"✅ Enhanced reports saved to: {reports_dir}")
                print(f"📋 Generated reports:")
                print(f"   • enhanced_summary_report.txt - Comprehensive analysis")
                print(f"   • channel_optimization_report.txt - Priority optimization suggestions")
                print(f"   • duplicate_pattern_report.txt - Duplicate pattern analysis")
                print(f"   • actionable_insights_report.txt - Recommendations and insights")
                print(f"   • detailed_duplicate_analysis.txt - Specific songs and their duplicates")
                print(f"   • analysis_data.json - Raw analysis data for further processing")
                print(f"   • skip_songs_detailed.json - Web UI data (always generated)")
            elif args.dry_run:
                print("\nDRY RUN MODE: No reports generated")

            print("\n" + "=" * 60)
            print("Analysis complete!")

        except Exception as e:
            print(f"\nError during processing: {e}")
            sys.exit(1)


if __name__ == "__main__":
    main()