KaraokeMerge/cli/main.py

#!/usr/bin/env python3
"""
Main CLI application for the Karaoke Song Library Cleanup Tool.
"""
import argparse
import sys
import os
from typing import Dict, List, Any

# Add the cli directory to the path for imports
sys.path.append(os.path.dirname(os.path.abspath(__file__)))

from utils import load_json_file, save_json_file
from matching import SongMatcher
from report import ReportGenerator


def parse_arguments():
    """Parse command line arguments."""
    parser = argparse.ArgumentParser(
        description="Karaoke Song Library Cleanup Tool",
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog="""
Examples:
  python main.py                                    # Run with default settings (generates reports automatically)
  python main.py --verbose                          # Enable verbose output
  python main.py --config custom_config.json        # Use custom config
  python main.py --output-dir ./reports             # Save reports to custom directory
  python main.py --dry-run                          # Analyze without generating files
        """
    )

    parser.add_argument(
        '--config',
        default='config/config.json',
        help='Path to configuration file (default: config/config.json)'
    )

    parser.add_argument(
        '--input',
        default='data/allSongs.json',
        help='Path to input songs file (default: data/allSongs.json)'
    )

    parser.add_argument(
        '--output-dir',
        default='data',
        help='Directory for output files (default: data)'
    )

    parser.add_argument(
        '--verbose', '-v',
        action='store_true',
        help='Enable verbose output'
    )

    parser.add_argument(
        '--dry-run',
        action='store_true',
        help='Analyze songs without generating skip list'
    )

    parser.add_argument(
        '--save-reports',
        action='store_true',
        help='Save detailed reports to files (now always enabled by default)'
    )

    parser.add_argument(
        '--show-config',
        action='store_true',
        help='Show current configuration and exit'
    )

    return parser.parse_args()


def load_config(config_path: str) -> Dict[str, Any]:
    """Load and validate configuration."""
    try:
        config = load_json_file(config_path)
        print(f"Configuration loaded from: {config_path}")
        return config
    except Exception as e:
        print(f"Error loading configuration: {e}")
        sys.exit(1)


def load_songs(input_path: str) -> List[Dict[str, Any]]:
    """Load songs from input file."""
    try:
        print(f"Loading songs from: {input_path}")
        songs = load_json_file(input_path)

        if not isinstance(songs, list):
            raise ValueError("Input file must contain a JSON array")

        print(f"Loaded {len(songs):,} songs")
        return songs
    except Exception as e:
        print(f"Error loading songs: {e}")
        sys.exit(1)


def main():
    """Main application entry point."""
    args = parse_arguments()

    # Load configuration
    config = load_config(args.config)

    # Override config with command line arguments
    if args.verbose:
        config['output']['verbose'] = True

    # Show configuration if requested
    if args.show_config:
        reporter = ReportGenerator(config)
        reporter.print_report("config", config)
        return

    # Load songs
    songs = load_songs(args.input)

    # Initialize components
    data_dir = args.output_dir
    matcher = SongMatcher(config, data_dir)
    reporter = ReportGenerator(config)

    print("\nStarting song analysis...")
    print("=" * 60)

    # Process songs
    try:
        best_songs, skip_songs, stats = matcher.process_songs(songs)

        # Generate reports
        print("\n" + "=" * 60)
        reporter.print_report("summary", stats)

        # Add channel priority report
        if config.get('channel_priorities'):
            channel_report = reporter.generate_channel_priority_report(stats, config['channel_priorities'])
            print("\n" + channel_report)

        if config['output']['verbose']:
            duplicate_info = matcher.get_detailed_duplicate_info(songs)
            reporter.print_report("duplicates", duplicate_info)

        reporter.print_report("skip_summary", skip_songs)

        # Save skip list if not dry run
        if not args.dry_run and skip_songs:
            skip_list_path = os.path.join(args.output_dir, 'skipSongs.json')

            # Create simplified skip list (just paths and reasons) with deduplication
            seen_paths = set()
            simple_skip_list = []
            duplicate_count = 0

            for skip_song in skip_songs:
                path = skip_song['path']
                if path not in seen_paths:
                    seen_paths.add(path)
                    skip_entry = {'path': path}
                    if config['output']['include_reasons']:
                        skip_entry['reason'] = skip_song['reason']
                    simple_skip_list.append(skip_entry)
                else:
                    duplicate_count += 1

            save_json_file(simple_skip_list, skip_list_path)
            print(f"\nSkip list saved to: {skip_list_path}")
            print(f"Total songs to skip: {len(simple_skip_list):,}")
            if duplicate_count > 0:
                print(f"Removed {duplicate_count:,} duplicate entries from skip list")
        elif args.dry_run:
            print("\nDRY RUN MODE: No skip list generated")

        # Always generate detailed reports (not just when --save-reports is used)
        if not args.dry_run:
            reports_dir = os.path.join(args.output_dir, 'reports')
            os.makedirs(reports_dir, exist_ok=True)

            print(f"\n📊 Generating enhanced analysis reports...")

            # Analyze skip patterns
            skip_analysis = reporter.analyze_skip_patterns(skip_songs)

            # Analyze channel optimization
            channel_analysis = reporter.analyze_channel_optimization(stats, skip_analysis)

            # Generate and save enhanced reports
            enhanced_summary = reporter.generate_enhanced_summary_report(stats, skip_analysis)
            reporter.save_report_to_file(enhanced_summary, os.path.join(reports_dir, 'enhanced_summary_report.txt'))

            channel_optimization = reporter.generate_channel_optimization_report(channel_analysis)
            reporter.save_report_to_file(channel_optimization, os.path.join(reports_dir, 'channel_optimization_report.txt'))

            duplicate_patterns = reporter.generate_duplicate_pattern_report(skip_analysis)
            reporter.save_report_to_file(duplicate_patterns, os.path.join(reports_dir, 'duplicate_pattern_report.txt'))

            actionable_insights = reporter.generate_actionable_insights_report(stats, skip_analysis, channel_analysis)
            reporter.save_report_to_file(actionable_insights, os.path.join(reports_dir, 'actionable_insights_report.txt'))

            # Generate detailed duplicate analysis
            detailed_duplicates = reporter.generate_detailed_duplicate_analysis(skip_songs, best_songs)
            reporter.save_report_to_file(detailed_duplicates, os.path.join(reports_dir, 'detailed_duplicate_analysis.txt'))

            # Save original reports for compatibility
            summary_report = reporter.generate_summary_report(stats)
            reporter.save_report_to_file(summary_report, os.path.join(reports_dir, 'summary_report.txt'))

            skip_report = reporter.generate_skip_list_summary(skip_songs)
            reporter.save_report_to_file(skip_report, os.path.join(reports_dir, 'skip_list_summary.txt'))

            # Save detailed duplicate report if verbose
            if config['output']['verbose']:
                duplicate_info = matcher.get_detailed_duplicate_info(songs)
                duplicate_report = reporter.generate_duplicate_details(duplicate_info)
                reporter.save_report_to_file(duplicate_report, os.path.join(reports_dir, 'duplicate_details.txt'))

            # Save analysis data as JSON for further processing
            analysis_data = {
                'stats': stats,
                'skip_analysis': skip_analysis,
                'channel_analysis': channel_analysis,
                'timestamp': __import__('datetime').datetime.now().isoformat()
            }
            save_json_file(analysis_data, os.path.join(reports_dir, 'analysis_data.json'))

            # Save full skip list data (this is what the web UI needs)
            save_json_file(skip_songs, os.path.join(reports_dir, 'skip_songs_detailed.json'))

            print(f"✅ Enhanced reports saved to: {reports_dir}")
            print(f"📋 Generated reports:")
            print(f"   • enhanced_summary_report.txt - Comprehensive analysis")
            print(f"   • channel_optimization_report.txt - Priority optimization suggestions")
            print(f"   • duplicate_pattern_report.txt - Duplicate pattern analysis")
            print(f"   • actionable_insights_report.txt - Recommendations and insights")
            print(f"   • detailed_duplicate_analysis.txt - Specific songs and their duplicates")
            print(f"   • analysis_data.json - Raw analysis data for further processing")
            print(f"   • skip_songs_detailed.json - Web UI data (always generated)")
        elif args.dry_run:
            print("\nDRY RUN MODE: No reports generated")

        print("\n" + "=" * 60)
        print("Analysis complete!")

    except Exception as e:
        print(f"\nError during processing: {e}")
        sys.exit(1)


if __name__ == "__main__":
    main()