Signed-off-by: Matt Bruce <mbrucedogs@gmail.com>

This commit is contained in:
Matt Bruce 2025-08-01 08:03:31 -05:00
parent c9221a35b3
commit a538bcb7f5
2 changed files with 0 additions and 118 deletions

View File

@ -1,10 +0,0 @@
#!/usr/bin/env python3
"""
MusicBrainz Data Cleaner - Entry Point
Simple entry point that imports from the refactored src structure
"""
from src.cli.main import main
if __name__ == "__main__":
exit(main())

View File

@ -1,108 +0,0 @@
#!/usr/bin/env python3
"""
Quick test script for 20 random songs
Simple single-threaded approach
"""
import sys
import json
import time
from pathlib import Path
# Add the src directory to the path
sys.path.insert(0, '/app')
from src.cli.main import MusicBrainzCleaner
def main():
print('🚀 Starting quick test with 20 random songs...')
# Load songs
input_file = Path('data/songs.json')
if not input_file.exists():
print('❌ songs.json not found')
return
with open(input_file, 'r') as f:
all_songs = json.load(f)
print(f'📊 Total songs available: {len(all_songs):,}')
# Take 20 random songs
import random
sample_songs = random.sample(all_songs, 20)
print(f'🎯 Testing 20 random songs...')
# Initialize cleaner
cleaner = MusicBrainzCleaner()
# Process songs
found_artists = 0
found_recordings = 0
failed_songs = []
start_time = time.time()
for i, song in enumerate(sample_songs, 1):
print(f' [{i:2d}/20] Processing: "{song.get("artist", "Unknown")}" - "{song.get("title", "Unknown")}"')
try:
result = cleaner.clean_song(song)
artist_found = 'mbid' in result
recording_found = 'recording_mbid' in result
if artist_found and recording_found:
found_artists += 1
found_recordings += 1
print(f' ✅ Found both artist and recording')
else:
failed_songs.append({
'original': song,
'cleaned': result,
'artist_found': artist_found,
'recording_found': recording_found,
'artist_name': song.get('artist', 'Unknown'),
'title': song.get('title', 'Unknown')
})
print(f' ❌ Artist: {artist_found}, Recording: {recording_found}')
except Exception as e:
print(f' 💥 Error: {e}')
failed_songs.append({
'original': song,
'cleaned': {'error': str(e)},
'artist_found': False,
'recording_found': False,
'artist_name': song.get('artist', 'Unknown'),
'title': song.get('title', 'Unknown'),
'error': str(e)
})
end_time = time.time()
processing_time = end_time - start_time
# Calculate success rates
artist_success_rate = found_artists / 20 * 100
recording_success_rate = found_recordings / 20 * 100
failed_rate = len(failed_songs) / 20 * 100
print(f'\n📊 Final Results:')
print(f' ⏱️ Processing time: {processing_time:.2f} seconds')
print(f' 🚀 Speed: {20/processing_time:.1f} songs/second')
print(f' ✅ Artists found: {found_artists}/20 ({artist_success_rate:.1f}%)')
print(f' ✅ Recordings found: {found_recordings}/20 ({recording_success_rate:.1f}%)')
print(f' ❌ Failed songs: {len(failed_songs)} ({failed_rate:.1f}%)')
# Show failed songs
if failed_songs:
print(f'\n🔍 Failed songs:')
for i, failed in enumerate(failed_songs, 1):
print(f' [{i}] "{failed["artist_name"]}" - "{failed["title"]}"')
print(f' Artist found: {failed["artist_found"]}, Recording found: {failed["recording_found"]}')
if 'error' in failed:
print(f' Error: {failed["error"]}')
else:
print('\n🎉 All songs processed successfully!')
if __name__ == '__main__':
main()