KaraokeMerge/web/app.py

345 lines
12 KiB
Python

#!/usr/bin/env python3
"""
Web UI for Karaoke Song Library Cleanup Tool
Provides interactive interface for reviewing duplicates and making decisions.
"""
from flask import Flask, render_template, jsonify, request, send_from_directory
import json
import os
from typing import Dict, List, Any
from datetime import datetime
app = Flask(__name__)
# Configuration
DATA_DIR = '../data'
REPORTS_DIR = os.path.join(DATA_DIR, 'reports')
CONFIG_FILE = '../config/config.json'
def load_json_file(file_path: str) -> Any:
"""Load JSON file safely."""
try:
with open(file_path, 'r', encoding='utf-8') as f:
return json.load(f)
except Exception as e:
print(f"Error loading {file_path}: {e}")
return None
def get_duplicate_groups(skip_songs: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
"""Group skip songs by artist/title to show duplicates together."""
duplicate_groups = {}
for skip_song in skip_songs:
artist = skip_song.get('artist', 'Unknown')
title = skip_song.get('title', 'Unknown')
key = f"{artist} - {title}"
if key not in duplicate_groups:
duplicate_groups[key] = {
'artist': artist,
'title': title,
'kept_version': skip_song.get('kept_version', 'Unknown'),
'skipped_versions': [],
'total_duplicates': 0
}
duplicate_groups[key]['skipped_versions'].append({
'path': skip_song['path'],
'reason': skip_song.get('reason', 'duplicate'),
'file_type': get_file_type(skip_song['path']),
'channel': extract_channel(skip_song['path'])
})
duplicate_groups[key]['total_duplicates'] = len(duplicate_groups[key]['skipped_versions'])
# Convert to list and sort by artist first, then by title
groups_list = list(duplicate_groups.values())
groups_list.sort(key=lambda x: (x['artist'].lower(), x['title'].lower()))
return groups_list
def get_file_type(path: str) -> str:
"""Extract file type from path."""
path_lower = path.lower()
if path_lower.endswith('.mp4'):
return 'MP4'
elif path_lower.endswith('.mp3'):
return 'MP3'
elif path_lower.endswith('.cdg'):
return 'MP3' # Treat CDG as MP3 since they're paired
return 'Unknown'
def extract_channel(path: str) -> str:
"""Extract channel name from path."""
path_lower = path.lower()
# Split path into parts
parts = path.split('\\')
# Look for specific known channels first
known_channels = ['Sing King Karaoke', 'KaraFun Karaoke', 'Stingray Karaoke']
for channel in known_channels:
if channel.lower() in path_lower:
return channel
# Look for MP4 folder structure: MP4/ChannelName/song.mp4
for i, part in enumerate(parts):
if part.lower() == 'mp4' and i < len(parts) - 1:
# If MP4 is found, return the next folder (the actual channel)
if i + 1 < len(parts):
next_part = parts[i + 1]
# Skip if the next part is the filename (no extension means it's a folder)
if '.' not in next_part:
return next_part
else:
return 'MP4 Root' # File is directly in MP4 folder
else:
return 'MP4 Root'
# Look for any folder that contains 'karaoke' (fallback)
for part in parts:
if 'karaoke' in part.lower():
return part
# If no specific channel found, return the folder containing the file
if len(parts) >= 2:
parent_folder = parts[-2] # Second to last part (folder containing the file)
# If parent folder is MP4, then file is in root
if parent_folder.lower() == 'mp4':
return 'MP4 Root'
return parent_folder
return 'Unknown'
@app.route('/')
def index():
"""Main dashboard page."""
return render_template('index.html')
@app.route('/api/duplicates')
def get_duplicates():
"""API endpoint to get duplicate data."""
# Try to load detailed skip songs first, fallback to basic skip list
skip_songs = load_json_file(os.path.join(DATA_DIR, 'reports', 'skip_songs_detailed.json'))
if not skip_songs:
skip_songs = load_json_file(os.path.join(DATA_DIR, 'skipSongs.json'))
if not skip_songs:
return jsonify({'error': 'No skip songs data found'}), 404
duplicate_groups = get_duplicate_groups(skip_songs)
# Apply filters
artist_filter = request.args.get('artist', '').lower()
title_filter = request.args.get('title', '').lower()
channel_filter = request.args.get('channel', '').lower()
file_type_filter = request.args.get('file_type', '').lower()
min_duplicates = int(request.args.get('min_duplicates', 0))
filtered_groups = []
for group in duplicate_groups:
# Apply filters
if artist_filter and artist_filter not in group['artist'].lower():
continue
if title_filter and title_filter not in group['title'].lower():
continue
if group['total_duplicates'] < min_duplicates:
continue
# Check if any version (kept or skipped) matches channel/file_type filters
if channel_filter or file_type_filter:
matches_filter = False
# Check kept version
kept_channel = extract_channel(group['kept_version'])
kept_file_type = get_file_type(group['kept_version'])
if (not channel_filter or channel_filter in kept_channel.lower()) and \
(not file_type_filter or file_type_filter in kept_file_type.lower()):
matches_filter = True
# Check skipped versions if kept version doesn't match
if not matches_filter:
for version in group['skipped_versions']:
if (not channel_filter or channel_filter in version['channel'].lower()) and \
(not file_type_filter or file_type_filter in version['file_type'].lower()):
matches_filter = True
break
if not matches_filter:
continue
filtered_groups.append(group)
# Pagination
page = int(request.args.get('page', 1))
per_page = int(request.args.get('per_page', 50))
start_idx = (page - 1) * per_page
end_idx = start_idx + per_page
paginated_groups = filtered_groups[start_idx:end_idx]
return jsonify({
'duplicates': paginated_groups,
'total': len(filtered_groups),
'page': page,
'per_page': per_page,
'total_pages': (len(filtered_groups) + per_page - 1) // per_page
})
@app.route('/api/stats')
def get_stats():
"""API endpoint to get overall statistics."""
# Try to load detailed skip songs first, fallback to basic skip list
skip_songs = load_json_file(os.path.join(DATA_DIR, 'reports', 'skip_songs_detailed.json'))
if not skip_songs:
skip_songs = load_json_file(os.path.join(DATA_DIR, 'skipSongs.json'))
if not skip_songs:
return jsonify({'error': 'No skip songs data found'}), 404
# Load original all songs data to get total counts
all_songs = load_json_file(os.path.join(DATA_DIR, 'allSongs.json'))
if not all_songs:
all_songs = []
duplicate_groups = get_duplicate_groups(skip_songs)
# Calculate current statistics
total_duplicates = len(duplicate_groups)
total_files_to_skip = len(skip_songs)
# File type breakdown for skipped files
skip_file_types = {'MP4': 0, 'MP3': 0}
channels = {}
for group in duplicate_groups:
# Include kept version in channel stats
kept_channel = extract_channel(group['kept_version'])
channels[kept_channel] = channels.get(kept_channel, 0) + 1
# Include skipped versions
for version in group['skipped_versions']:
skip_file_types[version['file_type']] += 1
channel = version['channel']
channels[channel] = channels.get(channel, 0) + 1
# Calculate total file type breakdown from all songs
total_file_types = {'MP4': 0, 'MP3': 0}
total_songs = len(all_songs)
for song in all_songs:
file_type = get_file_type(song.get('path', ''))
if file_type in total_file_types:
total_file_types[file_type] += 1
# Calculate what will remain after skipping
remaining_file_types = {
'MP4': total_file_types['MP4'] - skip_file_types['MP4'],
'MP3': total_file_types['MP3'] - skip_file_types['MP3']
}
total_remaining = sum(remaining_file_types.values())
# Most duplicated songs
most_duplicated = sorted(duplicate_groups, key=lambda x: x['total_duplicates'], reverse=True)[:10]
return jsonify({
'total_songs': total_songs,
'total_duplicates': total_duplicates,
'total_files_to_skip': total_files_to_skip,
'total_remaining': total_remaining,
'total_file_types': total_file_types,
'skip_file_types': skip_file_types,
'remaining_file_types': remaining_file_types,
'channels': channels,
'most_duplicated': most_duplicated
})
@app.route('/api/config')
def get_config():
"""API endpoint to get current configuration."""
config = load_json_file(CONFIG_FILE)
return jsonify(config or {})
@app.route('/api/save-changes', methods=['POST'])
def save_changes():
"""API endpoint to save user changes to the skip list."""
try:
data = request.get_json()
changes = data.get('changes', [])
# Load current skip list
skip_songs = load_json_file(os.path.join(DATA_DIR, 'reports', 'skip_songs_detailed.json'))
if not skip_songs:
return jsonify({'error': 'No skip songs data found'}), 404
# Apply changes
for change in changes:
change_type = change.get('type')
song_key = change.get('song_key') # artist - title
file_path = change.get('file_path')
if change_type == 'keep_file':
# Remove this file from skip list
skip_songs = [s for s in skip_songs if s['path'] != file_path]
elif change_type == 'skip_file':
# Add this file to skip list
new_entry = {
'path': file_path,
'reason': 'manual_skip',
'artist': change.get('artist'),
'title': change.get('title'),
'kept_version': change.get('kept_version')
}
skip_songs.append(new_entry)
# Save updated skip list
backup_path = os.path.join(DATA_DIR, 'reports', f'skip_songs_backup_{datetime.now().strftime("%Y%m%d_%H%M%S")}.json')
import shutil
shutil.copy2(os.path.join(DATA_DIR, 'reports', 'skip_songs_detailed.json'), backup_path)
with open(os.path.join(DATA_DIR, 'reports', 'skip_songs_detailed.json'), 'w', encoding='utf-8') as f:
json.dump(skip_songs, f, indent=2, ensure_ascii=False)
return jsonify({
'success': True,
'message': f'Changes saved successfully. Backup created at: {backup_path}',
'total_files': len(skip_songs)
})
except Exception as e:
return jsonify({'error': f'Error saving changes: {str(e)}'}), 500
@app.route('/api/artists')
def get_artists():
"""API endpoint to get list of all artists for grouping."""
skip_songs = load_json_file(os.path.join(DATA_DIR, 'reports', 'skip_songs_detailed.json'))
if not skip_songs:
return jsonify({'error': 'No skip songs data found'}), 404
duplicate_groups = get_duplicate_groups(skip_songs)
# Group by artist
artists = {}
for group in duplicate_groups:
artist = group['artist']
if artist not in artists:
artists[artist] = {
'name': artist,
'songs': [],
'total_duplicates': 0
}
artists[artist]['songs'].append(group)
artists[artist]['total_duplicates'] += group['total_duplicates']
# Convert to list and sort by artist name
artists_list = list(artists.values())
artists_list.sort(key=lambda x: x['name'].lower())
return jsonify({
'artists': artists_list,
'total_artists': len(artists_list)
})
if __name__ == '__main__':
app.run(debug=True, host='0.0.0.0', port=5000)