musicbrainz-cleaner/src/tests/unit/test_fuzzy_eazy_e.py

78 lines
2.6 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
"""
Test the fuzzy search specifically for Eazy-E.
"""
import psycopg2
import psycopg2.extras
from fuzzywuzzy import fuzz
# Database configuration
DB_HOST = "172.18.0.2"
DB_PORT = 5432
DB_NAME = "musicbrainz_db"
DB_USER = "musicbrainz"
DB_PASSWORD = "musicbrainz"
def test_fuzzy_eazy_e():
"""Test fuzzy search for Eazy-E."""
print("🔍 Testing fuzzy search for Eazy-E...")
try:
connection = psycopg2.connect(
host=DB_HOST,
port=DB_PORT,
database=DB_NAME,
user=DB_USER,
password=DB_PASSWORD,
connect_timeout=10
)
cursor = connection.cursor(cursor_factory=psycopg2.extras.RealDictCursor)
print("✅ Connected to database")
# Get all artists that might match Eazy-E
print("\n1. Getting potential matches for 'Eazy-E'...")
cursor.execute("SELECT name, gid FROM artist WHERE name ILIKE '%Eazy%' OR name ILIKE '%Sleazy%' LIMIT 20")
artists = cursor.fetchall()
print(f" Found {len(artists)} potential matches:")
for artist in artists:
score = fuzz.ratio('Eazy-E'.lower(), artist['name'].lower())
print(f" {artist['name']} (MBID: {artist['gid']}) - Score: {score}")
# Find the best match
best_match = None
best_score = 0
for artist in artists:
score = fuzz.ratio('Eazy-E'.lower(), artist['name'].lower())
if score > best_score:
best_score = score
best_match = artist
print(f"\n2. Best match: {best_match['name']} (Score: {best_score})")
# Check if there's an Eazy-E with different dash
print("\n3. Checking for Eazy-E with different dash character...")
cursor.execute("SELECT name, gid FROM artist WHERE name LIKE '%Eazy%E%' LIMIT 10")
eazy_variants = cursor.fetchall()
print(f" Found {len(eazy_variants)} Eazy-E variants:")
for artist in eazy_variants:
print(f" '{artist['name']}' (MBID: {artist['gid']})")
# Show the exact characters
for i, char in enumerate(artist['name']):
if char in ['-', '', '', '']:
print(f" Character {i}: '{char}' (ord: {ord(char)})")
cursor.close()
connection.close()
except Exception as e:
print(f"❌ Error: {e}")
import traceback
traceback.print_exc()
if __name__ == "__main__":
test_fuzzy_eazy_e()