Signed-off-by: Matt Bruce <mbrucedogs@gmail.com>
This commit is contained in:
parent
9124640bf4
commit
c9221a35b3
@ -10,9 +10,24 @@ src/tests/
|
|||||||
├── integration/ # Integration tests for database and API
|
├── integration/ # Integration tests for database and API
|
||||||
├── debug/ # Debug scripts and troubleshooting tests
|
├── debug/ # Debug scripts and troubleshooting tests
|
||||||
├── run_tests.py # Test runner script
|
├── run_tests.py # Test runner script
|
||||||
└── README.md # This file
|
├── README.md # This file
|
||||||
|
├── legacy/ # Legacy scripts moved from root directory
|
||||||
|
└── moved/ # Test files moved from root directory
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### Legacy Scripts (Moved from Root)
|
||||||
|
- `process_full_dataset.py` - Legacy script that redirects to new CLI
|
||||||
|
- `musicbrainz_cleaner.py` - Legacy entry point script
|
||||||
|
|
||||||
|
### Moved Test Files (Moved from Root)
|
||||||
|
- `test_title_cleaning.py` - Test title cleaning functionality
|
||||||
|
- `test_simple_query.py` - Test simple database queries
|
||||||
|
- `debug_artist_search.py` - Debug artist search functionality
|
||||||
|
- `test_failed_collaborations.py` - Test failed collaboration cases
|
||||||
|
- `test_collaboration_debug.py` - Debug collaboration parsing
|
||||||
|
- `test_100_random.py` - Test 100 random songs
|
||||||
|
- `quick_test_20.py` - Quick test with 20 songs
|
||||||
|
|
||||||
## 🧪 Test Categories
|
## 🧪 Test Categories
|
||||||
|
|
||||||
### Unit Tests (`unit/`)
|
### Unit Tests (`unit/`)
|
||||||
@ -44,6 +59,39 @@ src/tests/
|
|||||||
python3 src/tests/run_tests.py
|
python3 src/tests/run_tests.py
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### Running Moved Test Files
|
||||||
|
|
||||||
|
The following test files were moved from the root directory to `src/tests/`:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Run individual moved test files
|
||||||
|
python3 src/tests/test_100_random.py
|
||||||
|
python3 src/tests/quick_test_20.py
|
||||||
|
python3 src/tests/test_title_cleaning.py
|
||||||
|
python3 src/tests/test_simple_query.py
|
||||||
|
python3 src/tests/debug_artist_search.py
|
||||||
|
python3 src/tests/test_failed_collaborations.py
|
||||||
|
python3 src/tests/test_collaboration_debug.py
|
||||||
|
```
|
||||||
|
|
||||||
|
### Running Legacy Scripts
|
||||||
|
|
||||||
|
Legacy scripts that redirect to the new CLI:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Legacy full dataset processing (redirects to CLI)
|
||||||
|
python3 src/tests/process_full_dataset.py
|
||||||
|
|
||||||
|
# Legacy entry point (redirects to CLI)
|
||||||
|
python3 src/tests/musicbrainz_cleaner.py
|
||||||
|
```
|
||||||
|
|
||||||
|
**Note**: These legacy scripts are kept for backward compatibility but the new CLI is preferred:
|
||||||
|
```bash
|
||||||
|
# Preferred method (new CLI)
|
||||||
|
docker-compose run --rm musicbrainz-cleaner python3 -m src.cli.main
|
||||||
|
```
|
||||||
|
|
||||||
### Run Specific Test Categories
|
### Run Specific Test Categories
|
||||||
```bash
|
```bash
|
||||||
# Run only unit tests
|
# Run only unit tests
|
||||||
|
|||||||
40
src/tests/debug_artist_search.py
Normal file
40
src/tests/debug_artist_search.py
Normal file
@ -0,0 +1,40 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Debug script for individual artist searches
|
||||||
|
"""
|
||||||
|
|
||||||
|
import sys
|
||||||
|
sys.path.insert(0, '/app')
|
||||||
|
|
||||||
|
from src.api.database import MusicBrainzDatabase
|
||||||
|
|
||||||
|
def test_artist_search():
|
||||||
|
db = MusicBrainzDatabase()
|
||||||
|
|
||||||
|
# Test the main artists from the collaborations
|
||||||
|
test_artists = [
|
||||||
|
"Kanye", # From "ft Jamie Foxx West, Kanye"
|
||||||
|
"Kanye West", # What it should be
|
||||||
|
"Ariana Grande", # From "Ariana Grande, Normani, Nicki Minaj"
|
||||||
|
"SZA", # From "SZA, Justin Bieber"
|
||||||
|
]
|
||||||
|
|
||||||
|
print("🔍 Testing Individual Artist Searches")
|
||||||
|
print("=" * 50)
|
||||||
|
|
||||||
|
for artist_name in test_artists:
|
||||||
|
print(f"\n📝 Searching for: '{artist_name}'")
|
||||||
|
|
||||||
|
try:
|
||||||
|
result = db.fuzzy_search_artist(artist_name)
|
||||||
|
if result:
|
||||||
|
matched_name, mbid, score = result
|
||||||
|
print(f" ✅ Found: '{matched_name}' (MBID: {mbid}, Score: {score})")
|
||||||
|
else:
|
||||||
|
print(f" ❌ Not found")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f" ❌ Error: {e}")
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
test_artist_search()
|
||||||
10
src/tests/musicbrainz_cleaner.py
Normal file
10
src/tests/musicbrainz_cleaner.py
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
MusicBrainz Data Cleaner - Entry Point
|
||||||
|
Simple entry point that imports from the refactored src structure
|
||||||
|
"""
|
||||||
|
|
||||||
|
from src.cli.main import main
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
exit(main())
|
||||||
42
src/tests/process_full_dataset.py
Normal file
42
src/tests/process_full_dataset.py
Normal file
@ -0,0 +1,42 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Legacy script for full dataset processing.
|
||||||
|
This script now redirects to the new CLI command for better functionality.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import sys
|
||||||
|
import subprocess
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
def main():
|
||||||
|
print("🔄 Redirecting to new CLI command...")
|
||||||
|
print("📝 Use: docker-compose run --rm musicbrainz-cleaner python3 -m src.cli.main --process-full-dataset")
|
||||||
|
print()
|
||||||
|
|
||||||
|
# Get the current script's directory
|
||||||
|
script_dir = Path(__file__).parent
|
||||||
|
|
||||||
|
# Build the new command
|
||||||
|
cmd = [
|
||||||
|
"docker-compose", "run", "--rm", "musicbrainz-cleaner",
|
||||||
|
"python3", "-m", "src.cli.main", "--process-full-dataset"
|
||||||
|
]
|
||||||
|
|
||||||
|
# Add any additional arguments passed to this script
|
||||||
|
if len(sys.argv) > 1:
|
||||||
|
cmd.extend(sys.argv[1:])
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Change to the script directory and run the command
|
||||||
|
subprocess.run(cmd, cwd=script_dir, check=True)
|
||||||
|
except subprocess.CalledProcessError as e:
|
||||||
|
print(f"❌ Error running command: {e}")
|
||||||
|
sys.exit(1)
|
||||||
|
except FileNotFoundError:
|
||||||
|
print("❌ Error: docker-compose not found. Make sure Docker is running.")
|
||||||
|
print("💡 Alternative: Run directly with:")
|
||||||
|
print(" python3 -m src.cli.main --process-full-dataset")
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
108
src/tests/quick_test_20.py
Normal file
108
src/tests/quick_test_20.py
Normal file
@ -0,0 +1,108 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Quick test script for 20 random songs
|
||||||
|
Simple single-threaded approach
|
||||||
|
"""
|
||||||
|
|
||||||
|
import sys
|
||||||
|
import json
|
||||||
|
import time
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
# Add the src directory to the path
|
||||||
|
sys.path.insert(0, '/app')
|
||||||
|
from src.cli.main import MusicBrainzCleaner
|
||||||
|
|
||||||
|
def main():
|
||||||
|
print('🚀 Starting quick test with 20 random songs...')
|
||||||
|
|
||||||
|
# Load songs
|
||||||
|
input_file = Path('data/songs.json')
|
||||||
|
if not input_file.exists():
|
||||||
|
print('❌ songs.json not found')
|
||||||
|
return
|
||||||
|
|
||||||
|
with open(input_file, 'r') as f:
|
||||||
|
all_songs = json.load(f)
|
||||||
|
|
||||||
|
print(f'📊 Total songs available: {len(all_songs):,}')
|
||||||
|
|
||||||
|
# Take 20 random songs
|
||||||
|
import random
|
||||||
|
sample_songs = random.sample(all_songs, 20)
|
||||||
|
print(f'🎯 Testing 20 random songs...')
|
||||||
|
|
||||||
|
# Initialize cleaner
|
||||||
|
cleaner = MusicBrainzCleaner()
|
||||||
|
|
||||||
|
# Process songs
|
||||||
|
found_artists = 0
|
||||||
|
found_recordings = 0
|
||||||
|
failed_songs = []
|
||||||
|
|
||||||
|
start_time = time.time()
|
||||||
|
|
||||||
|
for i, song in enumerate(sample_songs, 1):
|
||||||
|
print(f' [{i:2d}/20] Processing: "{song.get("artist", "Unknown")}" - "{song.get("title", "Unknown")}"')
|
||||||
|
|
||||||
|
try:
|
||||||
|
result = cleaner.clean_song(song)
|
||||||
|
|
||||||
|
artist_found = 'mbid' in result
|
||||||
|
recording_found = 'recording_mbid' in result
|
||||||
|
|
||||||
|
if artist_found and recording_found:
|
||||||
|
found_artists += 1
|
||||||
|
found_recordings += 1
|
||||||
|
print(f' ✅ Found both artist and recording')
|
||||||
|
else:
|
||||||
|
failed_songs.append({
|
||||||
|
'original': song,
|
||||||
|
'cleaned': result,
|
||||||
|
'artist_found': artist_found,
|
||||||
|
'recording_found': recording_found,
|
||||||
|
'artist_name': song.get('artist', 'Unknown'),
|
||||||
|
'title': song.get('title', 'Unknown')
|
||||||
|
})
|
||||||
|
print(f' ❌ Artist: {artist_found}, Recording: {recording_found}')
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f' 💥 Error: {e}')
|
||||||
|
failed_songs.append({
|
||||||
|
'original': song,
|
||||||
|
'cleaned': {'error': str(e)},
|
||||||
|
'artist_found': False,
|
||||||
|
'recording_found': False,
|
||||||
|
'artist_name': song.get('artist', 'Unknown'),
|
||||||
|
'title': song.get('title', 'Unknown'),
|
||||||
|
'error': str(e)
|
||||||
|
})
|
||||||
|
|
||||||
|
end_time = time.time()
|
||||||
|
processing_time = end_time - start_time
|
||||||
|
|
||||||
|
# Calculate success rates
|
||||||
|
artist_success_rate = found_artists / 20 * 100
|
||||||
|
recording_success_rate = found_recordings / 20 * 100
|
||||||
|
failed_rate = len(failed_songs) / 20 * 100
|
||||||
|
|
||||||
|
print(f'\n📊 Final Results:')
|
||||||
|
print(f' ⏱️ Processing time: {processing_time:.2f} seconds')
|
||||||
|
print(f' 🚀 Speed: {20/processing_time:.1f} songs/second')
|
||||||
|
print(f' ✅ Artists found: {found_artists}/20 ({artist_success_rate:.1f}%)')
|
||||||
|
print(f' ✅ Recordings found: {found_recordings}/20 ({recording_success_rate:.1f}%)')
|
||||||
|
print(f' ❌ Failed songs: {len(failed_songs)} ({failed_rate:.1f}%)')
|
||||||
|
|
||||||
|
# Show failed songs
|
||||||
|
if failed_songs:
|
||||||
|
print(f'\n🔍 Failed songs:')
|
||||||
|
for i, failed in enumerate(failed_songs, 1):
|
||||||
|
print(f' [{i}] "{failed["artist_name"]}" - "{failed["title"]}"')
|
||||||
|
print(f' Artist found: {failed["artist_found"]}, Recording found: {failed["recording_found"]}')
|
||||||
|
if 'error' in failed:
|
||||||
|
print(f' Error: {failed["error"]}')
|
||||||
|
else:
|
||||||
|
print('\n🎉 All songs processed successfully!')
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
||||||
133
src/tests/test_100_random.py
Normal file
133
src/tests/test_100_random.py
Normal file
@ -0,0 +1,133 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Test script for 100 random songs
|
||||||
|
Simple single-threaded approach
|
||||||
|
"""
|
||||||
|
|
||||||
|
import sys
|
||||||
|
import json
|
||||||
|
import time
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
# Add the src directory to the path
|
||||||
|
sys.path.insert(0, '/app')
|
||||||
|
from src.cli.main import MusicBrainzCleaner
|
||||||
|
|
||||||
|
def main():
|
||||||
|
print('🚀 Starting test with 100 random songs...')
|
||||||
|
|
||||||
|
# Load songs
|
||||||
|
input_file = Path('data/songs.json')
|
||||||
|
if not input_file.exists():
|
||||||
|
print('❌ songs.json not found')
|
||||||
|
return
|
||||||
|
|
||||||
|
with open(input_file, 'r') as f:
|
||||||
|
all_songs = json.load(f)
|
||||||
|
|
||||||
|
print(f'📊 Total songs available: {len(all_songs):,}')
|
||||||
|
|
||||||
|
# Take 100 random songs
|
||||||
|
import random
|
||||||
|
sample_songs = random.sample(all_songs, 100)
|
||||||
|
print(f'🎯 Testing 100 random songs...')
|
||||||
|
|
||||||
|
# Initialize cleaner
|
||||||
|
cleaner = MusicBrainzCleaner()
|
||||||
|
|
||||||
|
# Process songs
|
||||||
|
found_artists = 0
|
||||||
|
found_recordings = 0
|
||||||
|
failed_songs = []
|
||||||
|
|
||||||
|
start_time = time.time()
|
||||||
|
|
||||||
|
for i, song in enumerate(sample_songs, 1):
|
||||||
|
print(f' [{i:3d}/100] Processing: "{song.get("artist", "Unknown")}" - "{song.get("title", "Unknown")}"')
|
||||||
|
|
||||||
|
try:
|
||||||
|
result = cleaner.clean_song(song)
|
||||||
|
|
||||||
|
# Extract the cleaned song from the tuple (song_dict, success_boolean)
|
||||||
|
cleaned_song, success = result
|
||||||
|
|
||||||
|
artist_found = 'mbid' in cleaned_song
|
||||||
|
recording_found = 'recording_mbid' in cleaned_song
|
||||||
|
|
||||||
|
if artist_found and recording_found:
|
||||||
|
found_artists += 1
|
||||||
|
found_recordings += 1
|
||||||
|
print(f' ✅ Found both artist and recording')
|
||||||
|
else:
|
||||||
|
failed_songs.append({
|
||||||
|
'original': song,
|
||||||
|
'cleaned': cleaned_song,
|
||||||
|
'success': success,
|
||||||
|
'artist_found': artist_found,
|
||||||
|
'recording_found': recording_found,
|
||||||
|
'artist_name': song.get('artist', 'Unknown'),
|
||||||
|
'title': song.get('title', 'Unknown')
|
||||||
|
})
|
||||||
|
print(f' ❌ Artist: {artist_found}, Recording: {recording_found}')
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f' 💥 Error: {e}')
|
||||||
|
failed_songs.append({
|
||||||
|
'original': song,
|
||||||
|
'cleaned': {'error': str(e)},
|
||||||
|
'artist_found': False,
|
||||||
|
'recording_found': False,
|
||||||
|
'artist_name': song.get('artist', 'Unknown'),
|
||||||
|
'title': song.get('title', 'Unknown'),
|
||||||
|
'error': str(e)
|
||||||
|
})
|
||||||
|
|
||||||
|
end_time = time.time()
|
||||||
|
processing_time = end_time - start_time
|
||||||
|
|
||||||
|
# Calculate success rates
|
||||||
|
artist_success_rate = found_artists / 100 * 100
|
||||||
|
recording_success_rate = found_recordings / 100 * 100
|
||||||
|
failed_rate = len(failed_songs) / 100 * 100
|
||||||
|
|
||||||
|
print(f'\n📊 Final Results:')
|
||||||
|
print(f' ⏱️ Processing time: {processing_time:.2f} seconds')
|
||||||
|
print(f' 🚀 Speed: {100/processing_time:.1f} songs/second')
|
||||||
|
print(f' ✅ Artists found: {found_artists}/100 ({artist_success_rate:.1f}%)')
|
||||||
|
print(f' ✅ Recordings found: {found_recordings}/100 ({recording_success_rate:.1f}%)')
|
||||||
|
print(f' ❌ Failed songs: {len(failed_songs)} ({failed_rate:.1f}%)')
|
||||||
|
|
||||||
|
# Save detailed report
|
||||||
|
if failed_songs:
|
||||||
|
report_file = 'data/test_100_results.json'
|
||||||
|
report_data = {
|
||||||
|
'test_summary': {
|
||||||
|
'total_tested': 100,
|
||||||
|
'artists_found': found_artists,
|
||||||
|
'recordings_found': found_recordings,
|
||||||
|
'failed_count': len(failed_songs),
|
||||||
|
'artist_success_rate': artist_success_rate,
|
||||||
|
'recording_success_rate': recording_success_rate,
|
||||||
|
'processing_time_seconds': processing_time,
|
||||||
|
'songs_per_second': 100/processing_time
|
||||||
|
},
|
||||||
|
'failed_songs': failed_songs
|
||||||
|
}
|
||||||
|
|
||||||
|
with open(report_file, 'w') as f:
|
||||||
|
json.dump(report_data, f, indent=2)
|
||||||
|
|
||||||
|
print(f'\n📄 Detailed report saved to: {report_file}')
|
||||||
|
|
||||||
|
# Show examples of failed songs
|
||||||
|
print(f'\n🔍 Examples of failed songs:')
|
||||||
|
for i, failed in enumerate(failed_songs[:10], 1):
|
||||||
|
print(f' [{i}] "{failed["artist_name"]}" - "{failed["title"]}"')
|
||||||
|
print(f' Artist found: {failed["artist_found"]}, Recording found: {failed["recording_found"]}')
|
||||||
|
if 'error' in failed:
|
||||||
|
print(f' Error: {failed["error"]}')
|
||||||
|
else:
|
||||||
|
print('\n🎉 All songs processed successfully!')
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
||||||
43
src/tests/test_collaboration_debug.py
Normal file
43
src/tests/test_collaboration_debug.py
Normal file
@ -0,0 +1,43 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Debug script for collaboration parsing issues
|
||||||
|
"""
|
||||||
|
|
||||||
|
import sys
|
||||||
|
sys.path.insert(0, '/app')
|
||||||
|
|
||||||
|
from src.api.database import MusicBrainzDatabase
|
||||||
|
|
||||||
|
def test_collaboration_parsing():
|
||||||
|
db = MusicBrainzDatabase()
|
||||||
|
|
||||||
|
# Test cases from the failed songs
|
||||||
|
test_cases = [
|
||||||
|
"ft Jamie Foxx West, Kanye",
|
||||||
|
"Ariana Grande, Normani, Nicki Minaj",
|
||||||
|
"SZA, Justin Bieber",
|
||||||
|
"Will.I.Am ft. Britney Spears", # This should work now
|
||||||
|
"Florida Georgia Line Ft Luke Bryan", # This should work now
|
||||||
|
]
|
||||||
|
|
||||||
|
print("🔍 Testing Collaboration Parsing")
|
||||||
|
print("=" * 50)
|
||||||
|
|
||||||
|
for artist_string in test_cases:
|
||||||
|
print(f"\n📝 Input: '{artist_string}'")
|
||||||
|
|
||||||
|
try:
|
||||||
|
main_artist, collaborators = db._parse_complex_collaboration(artist_string)
|
||||||
|
print(f" ✅ Main artist: '{main_artist}'")
|
||||||
|
print(f" ✅ Collaborators: {collaborators}")
|
||||||
|
|
||||||
|
if collaborators:
|
||||||
|
print(f" 🎯 Parsed as collaboration")
|
||||||
|
else:
|
||||||
|
print(f" ❌ Not detected as collaboration")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f" ❌ Error: {e}")
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
test_collaboration_parsing()
|
||||||
62
src/tests/test_failed_collaborations.py
Normal file
62
src/tests/test_failed_collaborations.py
Normal file
@ -0,0 +1,62 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Test script for the specific failed collaboration songs
|
||||||
|
"""
|
||||||
|
|
||||||
|
import sys
|
||||||
|
import json
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
# Add the src directory to the path
|
||||||
|
sys.path.insert(0, '/app')
|
||||||
|
from src.cli.main import MusicBrainzCleaner
|
||||||
|
|
||||||
|
def main():
|
||||||
|
print('🔍 Testing Failed Collaboration Songs...')
|
||||||
|
|
||||||
|
# Create test songs from the failed ones
|
||||||
|
failed_songs = [
|
||||||
|
{
|
||||||
|
"artist": "ft Jamie Foxx West, Kanye",
|
||||||
|
"title": "Gold Digger"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"artist": "Ariana Grande, Normani, Nicki Minaj",
|
||||||
|
"title": "Bad To You"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"artist": "SZA, Justin Bieber",
|
||||||
|
"title": "Snooze (Acoustic)"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
# Initialize cleaner
|
||||||
|
cleaner = MusicBrainzCleaner()
|
||||||
|
|
||||||
|
print(f'🎯 Testing {len(failed_songs)} collaboration songs...')
|
||||||
|
|
||||||
|
for i, song in enumerate(failed_songs, 1):
|
||||||
|
print(f'\n [{i}/{len(failed_songs)}] Processing: "{song["artist"]}" - "{song["title"]}"')
|
||||||
|
|
||||||
|
try:
|
||||||
|
result = cleaner.clean_song(song)
|
||||||
|
cleaned_song, success = result
|
||||||
|
|
||||||
|
artist_found = 'mbid' in cleaned_song
|
||||||
|
recording_found = 'recording_mbid' in cleaned_song
|
||||||
|
|
||||||
|
if artist_found and recording_found:
|
||||||
|
print(f' ✅ Found both artist and recording')
|
||||||
|
print(f' 🎯 Artist: {cleaned_song.get("artist", "Unknown")} (MBID: {cleaned_song.get("mbid", "None")})')
|
||||||
|
print(f' 🎯 Recording: {cleaned_song.get("recording_mbid", "None")}')
|
||||||
|
elif artist_found:
|
||||||
|
print(f' ⚠️ Artist found but recording not found')
|
||||||
|
print(f' 🎯 Artist: {cleaned_song.get("artist", "Unknown")} (MBID: {cleaned_song.get("mbid", "None")})')
|
||||||
|
else:
|
||||||
|
print(f' ❌ Neither artist nor recording found')
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f' ❌ Error: {e}')
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
48
src/tests/test_simple_query.py
Normal file
48
src/tests/test_simple_query.py
Normal file
@ -0,0 +1,48 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Simple database query test
|
||||||
|
"""
|
||||||
|
|
||||||
|
import sys
|
||||||
|
sys.path.insert(0, '/app')
|
||||||
|
|
||||||
|
from src.api.database import MusicBrainzDatabase
|
||||||
|
|
||||||
|
def test_simple_query():
|
||||||
|
db = MusicBrainzDatabase()
|
||||||
|
|
||||||
|
if not db.connect():
|
||||||
|
print("❌ Could not connect to database")
|
||||||
|
return
|
||||||
|
|
||||||
|
print("✅ Connected to database")
|
||||||
|
|
||||||
|
# Test a simple query
|
||||||
|
try:
|
||||||
|
db.cursor.execute("SELECT COUNT(*) FROM artist")
|
||||||
|
count = db.cursor.fetchone()
|
||||||
|
print(f"📊 Total artists in database: {count['count']:,}")
|
||||||
|
|
||||||
|
# Test specific artist query
|
||||||
|
db.cursor.execute("SELECT name, gid FROM artist WHERE name = %s LIMIT 1", ('Ariana Grande',))
|
||||||
|
result = db.cursor.fetchone()
|
||||||
|
if result:
|
||||||
|
print(f"✅ Found Ariana Grande: {result['name']} (MBID: {result['gid']})")
|
||||||
|
else:
|
||||||
|
print("❌ Ariana Grande not found")
|
||||||
|
|
||||||
|
# Test SZA
|
||||||
|
db.cursor.execute("SELECT name, gid FROM artist WHERE name = %s LIMIT 1", ('SZA',))
|
||||||
|
result = db.cursor.fetchone()
|
||||||
|
if result:
|
||||||
|
print(f"✅ Found SZA: {result['name']} (MBID: {result['gid']})")
|
||||||
|
else:
|
||||||
|
print("❌ SZA not found")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"❌ Error: {e}")
|
||||||
|
import traceback
|
||||||
|
traceback.print_exc()
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
test_simple_query()
|
||||||
50
src/tests/test_title_cleaning.py
Normal file
50
src/tests/test_title_cleaning.py
Normal file
@ -0,0 +1,50 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Test script for improved title cleaning
|
||||||
|
"""
|
||||||
|
|
||||||
|
import sys
|
||||||
|
sys.path.insert(0, '/app')
|
||||||
|
|
||||||
|
from src.api.database import MusicBrainzDatabase
|
||||||
|
|
||||||
|
def test_title_cleaning():
|
||||||
|
db = MusicBrainzDatabase()
|
||||||
|
|
||||||
|
# Test cases from the failed songs
|
||||||
|
test_titles = [
|
||||||
|
"Do I Wanna Know? - Live At the BBC",
|
||||||
|
"All The Small Things (John Lewis Christmas Ad 2022)",
|
||||||
|
"I Don t F k With You",
|
||||||
|
"Por Mujeres Como Tu",
|
||||||
|
"Thought You Should Know (Without Backing Vocals)",
|
||||||
|
"It Might Be You (from the movie Tootsie)",
|
||||||
|
"Speedy Gonzales (Boone & Speedy Vocals)",
|
||||||
|
"I'm Telling You Now (Two Semitones Down)",
|
||||||
|
"The ELO Medley 1",
|
||||||
|
"Can't Fight This Feeling (Minus Piano)",
|
||||||
|
"The Look Of Love",
|
||||||
|
"Revolution (Without Backing Vocals)",
|
||||||
|
"Right Here, Right Now (My Heart Belongs to You)",
|
||||||
|
"Hush Hush",
|
||||||
|
"On The Floor",
|
||||||
|
"(I've Had) The Time Of My Life",
|
||||||
|
]
|
||||||
|
|
||||||
|
print("🔍 Testing Improved Title Cleaning")
|
||||||
|
print("=" * 50)
|
||||||
|
|
||||||
|
for title in test_titles:
|
||||||
|
print(f"\n📝 Original: '{title}'")
|
||||||
|
|
||||||
|
try:
|
||||||
|
variations = db._generate_title_variations(title)
|
||||||
|
print(f" 🧹 Cleaned variations ({len(variations)}):")
|
||||||
|
for i, variation in enumerate(variations, 1):
|
||||||
|
print(f" {i}. '{variation}'")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f" ❌ Error: {e}")
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
test_title_cleaning()
|
||||||
Loading…
Reference in New Issue
Block a user