Compare commits
21 Commits
dc396a346f
...
7d60d7fc47
| Author | SHA1 | Date | |
|---|---|---|---|
| 7d60d7fc47 | |||
| f053471a76 | |||
| 2b17f2b57a | |||
| fee61a12bc | |||
| f127bac480 | |||
| b75418eade | |||
| 543e7b71d4 | |||
| ec33253403 | |||
| b794d9dc1c | |||
| 0f33590eca | |||
| 4e45ef0280 | |||
| 4bbd03eab7 | |||
| 3d8b0165af | |||
| a57687d10c | |||
| a538bcb7f5 | |||
| c9221a35b3 | |||
| 9124640bf4 | |||
| ddbc6a9ebc | |||
| 4bf359ee5d | |||
| 504820c8a1 | |||
| eeeb0bfd64 |
452
COMMANDS.md
452
COMMANDS.md
@ -2,55 +2,86 @@
|
||||
|
||||
## Overview
|
||||
|
||||
The MusicBrainz Data Cleaner is a command-line interface (CLI) tool that processes JSON song data files and cleans/normalizes the metadata using the MusicBrainz database.
|
||||
The MusicBrainz Data Cleaner is a command-line interface (CLI) tool that processes JSON song data files and cleans/normalizes the metadata using the MusicBrainz database. The tool uses an interface-based architecture with dependency injection for clean, maintainable code. It creates separate output files for successful and failed songs, along with detailed processing reports.
|
||||
|
||||
## Basic Command Structure
|
||||
|
||||
```bash
|
||||
python musicbrainz_cleaner.py <input_file> [output_file] [options]
|
||||
docker-compose run --rm musicbrainz-cleaner python3 -m src.cli.main [options]
|
||||
```
|
||||
|
||||
## Command Arguments
|
||||
## Command Options
|
||||
|
||||
### Required Arguments
|
||||
### Main Options
|
||||
|
||||
| Argument | Type | Description | Example |
|
||||
|----------|------|-------------|---------|
|
||||
| `input_file` | string | Path to the JSON file containing song data | `my_songs.json` |
|
||||
|
||||
### Optional Arguments
|
||||
|
||||
| Argument | Type | Description | Example |
|
||||
|----------|------|-------------|---------|
|
||||
| `output_file` | string | Path for the cleaned output file | `cleaned_songs.json` |
|
||||
| `--help` | flag | Show help information | `--help` |
|
||||
| `--version` | flag | Show version information | `--version` |
|
||||
| Option | Type | Description | Default | Example |
|
||||
|--------|------|-------------|---------|---------|
|
||||
| `--source` | string | Source JSON file path | `data/songs.json` | `--source data/my_songs.json` |
|
||||
| `--output-success` | string | Output file for successful songs | `source-success.json` | `--output-success cleaned.json` |
|
||||
| `--output-failure` | string | Output file for failed songs | `source-failure.json` | `--output-failure failed.json` |
|
||||
| `--limit` | number | Process only first N songs | None (all songs) | `--limit 1000` |
|
||||
| `--use-api` | flag | Force use of HTTP API instead of database | Database mode | `--use-api` |
|
||||
| `--test-connection` | flag | Test connection to MusicBrainz server | None | `--test-connection` |
|
||||
| `--help` | flag | Show help information | None | `--help` |
|
||||
| `--version` | flag | Show version information | None | `--version` |
|
||||
|
||||
## Command Examples
|
||||
|
||||
### Basic Usage
|
||||
### Basic Usage (Default)
|
||||
|
||||
```bash
|
||||
# Clean songs and save to auto-generated filename
|
||||
python musicbrainz_cleaner.py songs.json
|
||||
# Output: songs_cleaned.json
|
||||
# Process all songs with default settings
|
||||
docker-compose run --rm musicbrainz-cleaner python3 -m src.cli.main
|
||||
# Output: data/songs-success.json and data/songs-failure.json
|
||||
```
|
||||
|
||||
### Custom Output File
|
||||
### Custom Source File
|
||||
|
||||
```bash
|
||||
# Specify custom output filename
|
||||
python musicbrainz_cleaner.py songs.json cleaned_songs.json
|
||||
# Process specific file
|
||||
docker-compose run --rm musicbrainz-cleaner python3 -m src.cli.main --source data/my_songs.json
|
||||
# Output: data/my_songs-success.json and data/my_songs-failure.json
|
||||
```
|
||||
|
||||
### Custom Output Files
|
||||
|
||||
```bash
|
||||
# Specify custom output files
|
||||
docker-compose run --rm musicbrainz-cleaner python3 -m src.cli.main --source data/songs.json --output-success cleaned.json --output-failure failed.json
|
||||
```
|
||||
|
||||
### Limited Processing
|
||||
|
||||
```bash
|
||||
# Process only first 1000 songs
|
||||
docker-compose run --rm musicbrainz-cleaner python3 -m src.cli.main --limit 1000
|
||||
```
|
||||
|
||||
### Force API Mode
|
||||
|
||||
```bash
|
||||
# Use HTTP API instead of database (slower but works without PostgreSQL)
|
||||
docker-compose run --rm musicbrainz-cleaner python3 -m src.cli.main --use-api
|
||||
```
|
||||
|
||||
### Test Connection
|
||||
|
||||
```bash
|
||||
# Test database connection
|
||||
docker-compose run --rm musicbrainz-cleaner python3 -m src.cli.main --test-connection
|
||||
|
||||
# Test API connection
|
||||
docker-compose run --rm musicbrainz-cleaner python3 -m src.cli.main --test-connection --use-api
|
||||
```
|
||||
|
||||
### Help and Information
|
||||
|
||||
```bash
|
||||
# Show help information
|
||||
python musicbrainz_cleaner.py --help
|
||||
docker-compose run --rm musicbrainz-cleaner python3 -m src.cli.main --help
|
||||
|
||||
# Show version information
|
||||
python musicbrainz_cleaner.py --version
|
||||
docker-compose run --rm musicbrainz-cleaner python3 -m src.cli.main --version
|
||||
```
|
||||
|
||||
## Input File Format
|
||||
@ -84,9 +115,13 @@ Any additional fields will be preserved in the output:
|
||||
- `path`: File path
|
||||
- Any other custom fields
|
||||
|
||||
## Output File Format
|
||||
## Output Files
|
||||
|
||||
The output file will contain the same structure with cleaned data and added MBID fields:
|
||||
The tool creates **three output files**:
|
||||
|
||||
### 1. Successful Songs (`source-success.json`)
|
||||
|
||||
Array of successfully processed songs with MBIDs added:
|
||||
|
||||
```json
|
||||
[
|
||||
@ -103,49 +138,107 @@ The output file will contain the same structure with cleaned data and added MBID
|
||||
]
|
||||
```
|
||||
|
||||
### Added Fields
|
||||
### 2. Failed Songs (`source-failure.json`)
|
||||
|
||||
Array of songs that couldn't be processed (same format as source):
|
||||
|
||||
```json
|
||||
[
|
||||
{
|
||||
"artist": "Unknown Artist",
|
||||
"title": "Unknown Song",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"guid": "12345678-1234-1234-1234-123456789012",
|
||||
"path": "z://MP4\\Unknown Artist - Unknown Song.mp4"
|
||||
}
|
||||
]
|
||||
```
|
||||
|
||||
### 3. Processing Report (`processing_report_YYYYMMDD_HHMMSS.txt`)
|
||||
|
||||
Human-readable text report with statistics and failed song list:
|
||||
|
||||
```
|
||||
MusicBrainz Data Cleaner - Processing Report
|
||||
==================================================
|
||||
|
||||
Source File: data/songs.json
|
||||
Processing Date: 2024-12-19 14:30:22
|
||||
Processing Time: 15263.3 seconds
|
||||
|
||||
SUMMARY
|
||||
--------------------
|
||||
Total Songs Processed: 49,170
|
||||
Successful Songs: 40,692
|
||||
Failed Songs: 8,478
|
||||
Success Rate: 82.8%
|
||||
|
||||
DETAILED STATISTICS
|
||||
--------------------
|
||||
Artists Found: 44,526/49,170 (90.6%)
|
||||
Recordings Found: 40,998/49,170 (83.4%)
|
||||
Processing Speed: 3.2 songs/second
|
||||
|
||||
OUTPUT FILES
|
||||
--------------------
|
||||
Successful Songs: data/songs-success.json
|
||||
Failed Songs: data/songs-failure.json
|
||||
Report File: data/processing_report_20241219_143022.txt
|
||||
|
||||
FAILED SONGS (First 50)
|
||||
--------------------
|
||||
1. Unknown Artist - Unknown Song
|
||||
2. Invalid Artist - Invalid Title
|
||||
3. Test Artist - Test Song
|
||||
...
|
||||
```
|
||||
|
||||
### Added Fields (Successful Songs Only)
|
||||
|
||||
- `mbid`: MusicBrainz Artist ID (string)
|
||||
- `recording_mbid`: MusicBrainz Recording ID (string)
|
||||
|
||||
## Command Line Options
|
||||
## Processing Output
|
||||
|
||||
### Help Option
|
||||
### Progress Indicators
|
||||
|
||||
```bash
|
||||
python musicbrainz_cleaner.py --help
|
||||
```
|
||||
🚀 Starting song processing...
|
||||
📊 Total songs to process: 49,170
|
||||
Using database connection
|
||||
==================================================
|
||||
|
||||
[1 of 49,170] ✅ PASS: ACDC - Shot In The Dark
|
||||
[2 of 49,170] ❌ FAIL: Unknown Artist - Unknown Song
|
||||
[3 of 49,170] ✅ PASS: Bruno Mars feat. Cardi B - Finesse (remix)
|
||||
|
||||
📈 Progress: 100/49,170 (0.2%) - Success: 85.0% - Rate: 3.2 songs/sec
|
||||
|
||||
==================================================
|
||||
🎉 Processing completed!
|
||||
📊 Final Results:
|
||||
⏱️ Total processing time: 15263.3 seconds
|
||||
🚀 Average speed: 3.2 songs/second
|
||||
✅ Artists found: 44,526/49,170 (90.6%)
|
||||
✅ Recordings found: 40,998/49,170 (83.4%)
|
||||
❌ Failed songs: 8,478 (17.2%)
|
||||
📄 Files saved:
|
||||
✅ Successful songs: data/songs-success.json
|
||||
❌ Failed songs: data/songs-failure.json
|
||||
📋 Text report: data/processing_report_20241219_143022.txt
|
||||
📊 JSON report: data/processing_report_20241219_143022.json
|
||||
```
|
||||
|
||||
**Output:**
|
||||
```
|
||||
Usage: python musicbrainz_cleaner.py <input_file.json> [output_file.json]
|
||||
### Status Indicators
|
||||
|
||||
MusicBrainz Data Cleaner - Clean and normalize song data using MusicBrainz
|
||||
|
||||
Arguments:
|
||||
input_file.json JSON file containing array of song objects
|
||||
output_file.json Optional: Output file for cleaned data
|
||||
(default: input_file_cleaned.json)
|
||||
|
||||
Examples:
|
||||
python musicbrainz_cleaner.py songs.json
|
||||
python musicbrainz_cleaner.py songs.json cleaned_songs.json
|
||||
|
||||
Requirements:
|
||||
- MusicBrainz server running on http://localhost:5001
|
||||
- Python 3.6+ with requests library
|
||||
```
|
||||
|
||||
### Version Option
|
||||
|
||||
```bash
|
||||
python musicbrainz_cleaner.py --version
|
||||
```
|
||||
|
||||
**Output:**
|
||||
```
|
||||
MusicBrainz Data Cleaner v1.0.0
|
||||
```
|
||||
| Symbol | Meaning | Description |
|
||||
|--------|---------|-------------|
|
||||
| ✅ | Success | Song processed successfully with MBIDs found |
|
||||
| ❌ | Failure | Song processing failed (no MBIDs found) |
|
||||
| 📈 | Progress | Progress update with statistics |
|
||||
| 🚀 | Start | Processing started |
|
||||
| 🎉 | Complete | Processing completed successfully |
|
||||
|
||||
## Error Messages and Exit Codes
|
||||
|
||||
@ -161,7 +254,7 @@ MusicBrainz Data Cleaner v1.0.0
|
||||
|
||||
#### File Not Found
|
||||
```
|
||||
Error: File 'songs.json' not found
|
||||
Error: Source file does not exist: data/songs.json
|
||||
```
|
||||
|
||||
#### Invalid JSON
|
||||
@ -171,12 +264,12 @@ Error: Invalid JSON in file 'songs.json'
|
||||
|
||||
#### Invalid Input Format
|
||||
```
|
||||
Error: Input file should contain a JSON array of songs
|
||||
Error: Source file should contain a JSON array of songs
|
||||
```
|
||||
|
||||
#### Connection Error
|
||||
```
|
||||
Error searching for artist 'Artist Name': Connection refused
|
||||
❌ Connection to MusicBrainz database failed
|
||||
```
|
||||
|
||||
#### Missing Dependencies
|
||||
@ -184,112 +277,113 @@ Error searching for artist 'Artist Name': Connection refused
|
||||
ModuleNotFoundError: No module named 'requests'
|
||||
```
|
||||
|
||||
## Processing Output
|
||||
## Architecture Overview
|
||||
|
||||
### Progress Indicators
|
||||
### Interface-Based Design
|
||||
|
||||
```
|
||||
Processing 3 songs...
|
||||
==================================================
|
||||
The tool uses a clean interface-based architecture:
|
||||
|
||||
[1/3] Processing: ACDC - Shot In The Dark
|
||||
✅ Found artist: AC/DC (MBID: 66c662b6-6e2f-4930-8610-912e24c63ed1)
|
||||
✅ Found recording: Shot in the Dark (MBID: cf8b5cd0-d97c-413d-882f-fc422a2e57db)
|
||||
✅ Updated to: AC/DC - Shot in the Dark
|
||||
- **`MusicBrainzDataProvider` Interface**: Common protocol for data access
|
||||
- **`DataProviderFactory`**: Creates appropriate provider (database or API)
|
||||
- **`SongProcessor`**: Centralized processing logic using the interface
|
||||
- **Dependency Injection**: CLI depends on interfaces, not concrete classes
|
||||
|
||||
[2/3] Processing: Bruno Mars ft. Cardi B - Finesse Remix
|
||||
❌ Could not find artist: Bruno Mars ft. Cardi B
|
||||
### Data Flow
|
||||
|
||||
[3/3] Processing: Taylor Swift - Love Story
|
||||
✅ Found artist: Taylor Swift (MBID: 20244d07-534f-4eff-b4d4-930878889970)
|
||||
✅ Found recording: Love Story (MBID: d783e6c5-761f-4fc3-bfcf-6089cdfc8f96)
|
||||
✅ Updated to: Taylor Swift - Love Story
|
||||
1. **CLI** uses `DataProviderFactory` to create data provider
|
||||
2. **Factory** returns either database or API implementation
|
||||
3. **SongProcessor** processes songs using the common interface
|
||||
4. **Same logic** works regardless of provider type
|
||||
|
||||
==================================================
|
||||
✅ Processing complete!
|
||||
📁 Output saved to: songs_cleaned.json
|
||||
```
|
||||
## Environment Configuration
|
||||
|
||||
### Status Indicators
|
||||
### Docker Environment
|
||||
|
||||
| Symbol | Meaning | Description |
|
||||
|--------|---------|-------------|
|
||||
| ✅ | Success | Operation completed successfully |
|
||||
| ❌ | Error | Operation failed |
|
||||
| 🔄 | Processing | Currently processing |
|
||||
|
||||
## Batch Processing
|
||||
|
||||
### Multiple Files
|
||||
|
||||
To process multiple files, you can use shell scripting:
|
||||
|
||||
```bash
|
||||
# Process all JSON files in current directory
|
||||
for file in *.json; do
|
||||
python musicbrainz_cleaner.py "$file"
|
||||
done
|
||||
```
|
||||
|
||||
### Large Files
|
||||
|
||||
For large files, the tool processes songs one at a time with a 0.1-second delay between API calls to be respectful to the MusicBrainz server.
|
||||
|
||||
## Environment Variables
|
||||
|
||||
The tool uses the following default configuration:
|
||||
The tool runs in a Docker container with the following configuration:
|
||||
|
||||
| Setting | Default | Description |
|
||||
|---------|---------|-------------|
|
||||
| MusicBrainz URL | `http://localhost:5001` | Local MusicBrainz server URL |
|
||||
| API Delay | `0.1` seconds | Delay between API calls |
|
||||
| Database Host | `db` | PostgreSQL database container |
|
||||
| Database Port | `5432` | PostgreSQL port |
|
||||
| Database Name | `musicbrainz_db` | MusicBrainz database name |
|
||||
| API URL | `http://localhost:5001` | MusicBrainz web server URL |
|
||||
|
||||
### Environment Variables
|
||||
|
||||
```bash
|
||||
# Database configuration
|
||||
DB_HOST=db
|
||||
DB_PORT=5432
|
||||
DB_NAME=musicbrainz_db
|
||||
DB_USER=musicbrainz
|
||||
DB_PASSWORD=musicbrainz
|
||||
|
||||
# Web server configuration
|
||||
MUSICBRAINZ_WEB_SERVER_PORT=5001
|
||||
```
|
||||
|
||||
## Troubleshooting Commands
|
||||
|
||||
### Check MusicBrainz Server Status
|
||||
|
||||
```bash
|
||||
# Test if server is running
|
||||
# Test if web server is running
|
||||
curl -I http://localhost:5001
|
||||
|
||||
# Test API endpoint
|
||||
curl http://localhost:5001/ws/2/artist/?query=name:AC/DC&fmt=json
|
||||
# Test database connection
|
||||
docker-compose exec db psql -U musicbrainz -d musicbrainz_db -c "SELECT COUNT(*) FROM artist;"
|
||||
```
|
||||
|
||||
### Validate JSON File
|
||||
|
||||
```bash
|
||||
# Check if JSON is valid
|
||||
python -m json.tool songs.json
|
||||
python -m json.tool data/songs.json
|
||||
|
||||
# Check JSON structure
|
||||
python -c "import json; data=json.load(open('songs.json')); print('Valid JSON array with', len(data), 'items')"
|
||||
python -c "import json; data=json.load(open('data/songs.json')); print('Valid JSON array with', len(data), 'items')"
|
||||
```
|
||||
|
||||
### Check Python Dependencies
|
||||
### Test Tool Connection
|
||||
|
||||
```bash
|
||||
# Check if requests is installed
|
||||
python -c "import requests; print('requests version:', requests.__version__)"
|
||||
# Test database connection
|
||||
docker-compose run --rm musicbrainz-cleaner python3 -m src.cli.main --test-connection
|
||||
|
||||
# Install if missing
|
||||
pip install requests
|
||||
# Test API connection
|
||||
docker-compose run --rm musicbrainz-cleaner python3 -m src.cli.main --test-connection --use-api
|
||||
```
|
||||
|
||||
## Advanced Usage
|
||||
|
||||
### Custom MusicBrainz Server
|
||||
### Batch Processing
|
||||
|
||||
To use a different MusicBrainz server, modify the script:
|
||||
To process multiple files, you can use shell scripting:
|
||||
|
||||
```python
|
||||
# In musicbrainz_cleaner.py, change:
|
||||
self.base_url = "http://your-server:5001"
|
||||
```bash
|
||||
# Process all JSON files in data directory
|
||||
for file in data/*.json; do
|
||||
docker-compose run --rm musicbrainz-cleaner python3 -m src.cli.main --source "$file"
|
||||
done
|
||||
```
|
||||
|
||||
### Verbose Output
|
||||
### Large Files
|
||||
|
||||
For debugging, you can modify the script to add more verbose output by uncommenting debug print statements.
|
||||
For large files, the tool processes songs efficiently with:
|
||||
- Direct database access for maximum speed
|
||||
- Progress tracking every 100 songs
|
||||
- Memory-efficient processing
|
||||
- No rate limiting with database access
|
||||
|
||||
### Custom Processing
|
||||
|
||||
```bash
|
||||
# Process with custom chunk size (for testing)
|
||||
docker-compose run --rm musicbrainz-cleaner python3 -m src.cli.main --source data/songs.json --limit 1000
|
||||
|
||||
# Process with custom output files
|
||||
docker-compose run --rm musicbrainz-cleaner python3 -m src.cli.main --source data/songs.json --output-success my_cleaned.json --output-failure my_failed.json
|
||||
```
|
||||
|
||||
## Command Line Shortcuts
|
||||
|
||||
@ -299,18 +393,22 @@ Add these to your shell profile for convenience:
|
||||
|
||||
```bash
|
||||
# Add to ~/.bashrc or ~/.zshrc
|
||||
alias mbclean='python musicbrainz_cleaner.py'
|
||||
alias mbclean-help='python musicbrainz_cleaner.py --help'
|
||||
alias mbclean='docker-compose run --rm musicbrainz-cleaner python3 -m src.cli.main'
|
||||
alias mbclean-help='docker-compose run --rm musicbrainz-cleaner python3 -m src.cli.main --help'
|
||||
alias mbclean-test='docker-compose run --rm musicbrainz-cleaner python3 -m src.cli.main --test-connection'
|
||||
```
|
||||
|
||||
### Usage with Aliases
|
||||
|
||||
```bash
|
||||
# Using alias
|
||||
mbclean songs.json
|
||||
mbclean --source data/songs.json
|
||||
|
||||
# Show help
|
||||
mbclean-help
|
||||
|
||||
# Test connection
|
||||
mbclean-test
|
||||
```
|
||||
|
||||
## Integration Examples
|
||||
@ -319,8 +417,8 @@ mbclean-help
|
||||
|
||||
```bash
|
||||
# Process files and commit changes
|
||||
python musicbrainz_cleaner.py songs.json
|
||||
git add songs_cleaned.json
|
||||
docker-compose run --rm musicbrainz-cleaner python3 -m src.cli.main --source data/songs.json
|
||||
git add data/songs-success.json data/songs-failure.json
|
||||
git commit -m "Clean song metadata with MusicBrainz IDs"
|
||||
```
|
||||
|
||||
@ -328,7 +426,7 @@ git commit -m "Clean song metadata with MusicBrainz IDs"
|
||||
|
||||
```bash
|
||||
# Add to crontab to process files daily
|
||||
0 2 * * * cd /path/to/musicbrainz-cleaner && python musicbrainz_cleaner.py /path/to/songs.json
|
||||
0 2 * * * cd /path/to/musicbrainz-cleaner && docker-compose run --rm musicbrainz-cleaner python3 -m src.cli.main --source /path/to/songs.json
|
||||
```
|
||||
|
||||
### With Shell Scripts
|
||||
@ -337,13 +435,18 @@ git commit -m "Clean song metadata with MusicBrainz IDs"
|
||||
#!/bin/bash
|
||||
# clean_songs.sh
|
||||
INPUT_FILE="$1"
|
||||
OUTPUT_FILE="${INPUT_FILE%.json}_cleaned.json"
|
||||
OUTPUT_SUCCESS="${INPUT_FILE%.json}-success.json"
|
||||
OUTPUT_FAILURE="${INPUT_FILE%.json}-failure.json"
|
||||
|
||||
python musicbrainz_cleaner.py "$INPUT_FILE" "$OUTPUT_FILE"
|
||||
docker-compose run --rm musicbrainz-cleaner python3 -m src.cli.main \
|
||||
--source "$INPUT_FILE" \
|
||||
--output-success "$OUTPUT_SUCCESS" \
|
||||
--output-failure "$OUTPUT_FAILURE"
|
||||
|
||||
if [ $? -eq 0 ]; then
|
||||
echo "Successfully cleaned $INPUT_FILE"
|
||||
echo "Output saved to $OUTPUT_FILE"
|
||||
echo "Successfully processed $INPUT_FILE"
|
||||
echo "Successful songs: $OUTPUT_SUCCESS"
|
||||
echo "Failed songs: $OUTPUT_FAILURE"
|
||||
else
|
||||
echo "Error processing $INPUT_FILE"
|
||||
exit 1
|
||||
@ -354,7 +457,72 @@ fi
|
||||
|
||||
| Command | Description |
|
||||
|---------|-------------|
|
||||
| `python musicbrainz_cleaner.py file.json` | Basic usage |
|
||||
| `python musicbrainz_cleaner.py file.json output.json` | Custom output |
|
||||
| `python musicbrainz_cleaner.py --help` | Show help |
|
||||
| `python musicbrainz_cleaner.py --version` | Show version |
|
||||
| `docker-compose run --rm musicbrainz-cleaner python3 -m src.cli.main` | Process all songs with defaults |
|
||||
| `docker-compose run --rm musicbrainz-cleaner python3 -m src.cli.main --source file.json` | Process specific file |
|
||||
| `docker-compose run --rm musicbrainz-cleaner python3 -m src.cli.main --limit 1000` | Process first 1000 songs |
|
||||
| `docker-compose run --rm musicbrainz-cleaner python3 -m src.cli.main --test-connection` | Test database connection |
|
||||
| `docker-compose run --rm musicbrainz-cleaner python3 -m src.cli.main --use-api` | Force API mode |
|
||||
| `docker-compose run --rm musicbrainz-cleaner python3 -m src.cli.main --help` | Show help |
|
||||
| `docker-compose run --rm musicbrainz-cleaner python3 -m src.cli.main --version` | Show version |
|
||||
|
||||
## Artist Lookup System Commands
|
||||
|
||||
The MusicBrainz Data Cleaner includes an advanced Artist Lookup System with its own CLI interface for managing artist data.
|
||||
|
||||
### Artist Lookup CLI Structure
|
||||
|
||||
```bash
|
||||
docker-compose run --rm musicbrainz-cleaner python3 -m src.cli.artist_lookup_cli [command] [options]
|
||||
```
|
||||
|
||||
### Available Commands
|
||||
|
||||
#### Search for Artists
|
||||
```bash
|
||||
# Search for an artist in the lookup table
|
||||
docker-compose run --rm musicbrainz-cleaner python3 -m src.cli.artist_lookup_cli search "Queen"
|
||||
|
||||
# Search with custom similarity threshold (0.0 to 1.0)
|
||||
docker-compose run --rm musicbrainz-cleaner python3 -m src.cli.artist_lookup_cli search "Destiny's Child" --min-score 0.8
|
||||
```
|
||||
|
||||
#### View Statistics
|
||||
```bash
|
||||
# Show lookup table statistics
|
||||
docker-compose run --rm musicbrainz-cleaner python3 -m src.cli.artist_lookup_cli stats
|
||||
```
|
||||
|
||||
#### List All Artists
|
||||
```bash
|
||||
# List all artists in the lookup table
|
||||
docker-compose run --rm musicbrainz-cleaner python3 -m src.cli.artist_lookup_cli list
|
||||
```
|
||||
|
||||
#### Add New Artists
|
||||
```bash
|
||||
# Add a new artist with variations
|
||||
docker-compose run --rm musicbrainz-cleaner python3 -m src.cli.artist_lookup_cli add \
|
||||
--canonical-name "New Artist" \
|
||||
--mbid "12345678-1234-1234-1234-123456789abc" \
|
||||
--variations "Artist, The Artist, Artist Band" \
|
||||
--notes "Description of the artist"
|
||||
```
|
||||
|
||||
### Artist Lookup Command Reference
|
||||
|
||||
| Command | Description |
|
||||
|---------|-------------|
|
||||
| `docker-compose run --rm musicbrainz-cleaner python3 -m src.cli.artist_lookup_cli search "Artist Name"` | Search for artist with fuzzy matching |
|
||||
| `docker-compose run --rm musicbrainz-cleaner python3 -m src.cli.artist_lookup_cli search "Artist Name" --min-score 0.8` | Search with custom similarity threshold |
|
||||
| `docker-compose run --rm musicbrainz-cleaner python3 -m src.cli.artist_lookup_cli stats` | Show lookup table statistics |
|
||||
| `docker-compose run --rm musicbrainz-cleaner python3 -m src.cli.artist_lookup_cli list` | List all artists in lookup table |
|
||||
| `docker-compose run --rm musicbrainz-cleaner python3 -m src.cli.artist_lookup_cli add --canonical-name "Name" --mbid "MBID" --variations "var1, var2"` | Add new artist to lookup table |
|
||||
|
||||
### Artist Lookup Features
|
||||
|
||||
- **2,446+ Artists**: Comprehensive lookup table
|
||||
- **4,950+ Variations**: Extensive name variations and aliases
|
||||
- **Fuzzy Matching**: Intelligent matching with configurable thresholds
|
||||
- **Canonical Names**: Consistent artist name replacement
|
||||
- **Automatic Integration**: Works seamlessly with song processing
|
||||
- **CLI Management**: Full command-line interface for data management
|
||||
353
PRD.md
353
PRD.md
@ -4,9 +4,55 @@
|
||||
## Project Overview
|
||||
|
||||
**Product Name:** MusicBrainz Data Cleaner
|
||||
**Version:** 3.0.0
|
||||
**Date:** December 19, 2024
|
||||
**Status:** Production Ready with Advanced Database Integration ✅
|
||||
**Version:** 3.1.0
|
||||
**Date:** August 4, 2024
|
||||
**Status:** Production Ready with Advanced Artist Lookup System ✅
|
||||
|
||||
## 🚀 Quick Start for New Sessions
|
||||
|
||||
**For new chat sessions or after system reboots, follow this exact sequence:**
|
||||
|
||||
### 1. Start MusicBrainz Services
|
||||
```bash
|
||||
# Quick restart (recommended)
|
||||
./restart_services.sh
|
||||
|
||||
# Or full restart (if you have issues)
|
||||
./start_services.sh
|
||||
```
|
||||
|
||||
### 2. Wait for Services to Initialize
|
||||
- **Database**: 5-10 minutes to fully load
|
||||
- **Web server**: 2-3 minutes to start responding
|
||||
- **Check status**: `cd ../musicbrainz-docker && docker-compose ps`
|
||||
|
||||
### 3. Verify Services Are Ready
|
||||
```bash
|
||||
# Test web server
|
||||
curl -s http://localhost:5001 | head -5
|
||||
|
||||
# Test database (should show 2.6M+ artists)
|
||||
docker-compose exec db psql -U musicbrainz -d musicbrainz_db -c "SELECT COUNT(*) FROM artist;"
|
||||
|
||||
# Test cleaner connection
|
||||
docker-compose run --rm musicbrainz-cleaner python3 -c "from src.api.database import MusicBrainzDatabase; db = MusicBrainzDatabase(); print('Connection result:', db.connect())"
|
||||
```
|
||||
|
||||
### 4. Run the Cleaner
|
||||
```bash
|
||||
# Process all songs with default settings
|
||||
docker-compose run --rm musicbrainz-cleaner python3 -m src.cli.main
|
||||
|
||||
# Process with custom options
|
||||
docker-compose run --rm musicbrainz-cleaner python3 -m src.cli.main --source data/my_songs.json --limit 1000
|
||||
|
||||
# Test connection
|
||||
docker-compose run --rm musicbrainz-cleaner python3 -m src.cli.main --test-connection
|
||||
```
|
||||
|
||||
**⚠️ Critical**: Always run scripts via Docker - the cleaner cannot connect to the database directly from outside the container.
|
||||
|
||||
**📋 Troubleshooting**: See `TROUBLESHOOTING.md` for common issues and solutions.
|
||||
|
||||
## Problem Statement
|
||||
|
||||
@ -19,6 +65,9 @@ Users have song data in JSON format with inconsistent artist names, song titles,
|
||||
- **NEW**: Use fuzzy search for better matching of similar names
|
||||
- **NEW**: Handle artist aliases and name variations (e.g., "98 Degrees" → "98°")
|
||||
- **NEW**: Distinguish between band names and collaborations (e.g., "Simon & Garfunkel" vs "Lavato, Demi & Joe Jonas")
|
||||
- **NEW**: Advanced Artist Lookup System with 2,446+ artists and 4,950+ variations
|
||||
- **NEW**: Fallback lookup table for artists not found in database
|
||||
- **NEW**: Canonical name replacement for consistent artist naming
|
||||
|
||||
## Target Users
|
||||
|
||||
@ -76,10 +125,15 @@ Users have song data in JSON format with inconsistent artist names, song titles,
|
||||
|
||||
#### 6. CLI Interface
|
||||
- **REQ-034:** Command-line interface with argument parsing
|
||||
- **REQ-035:** Support for input and optional output file specification
|
||||
- **REQ-036:** Progress reporting during processing
|
||||
- **REQ-035:** Support for source file specification with smart defaults
|
||||
- **REQ-036:** Progress reporting during processing with song counter
|
||||
- **REQ-037:** Error handling and user-friendly messages
|
||||
- **REQ-038:** Option to force API mode with `--use-api` flag
|
||||
- **NEW REQ-039:** Simplified CLI with default full dataset processing
|
||||
- **NEW REQ-040:** Separate output files for successful and failed songs (array format)
|
||||
- **NEW REQ-041:** Human-readable text report with statistics
|
||||
- **NEW REQ-042:** Configurable processing limits and output file paths
|
||||
- **NEW REQ-043:** Smart defaults for all file paths and options
|
||||
|
||||
### ✅ Non-Functional Requirements
|
||||
|
||||
@ -111,6 +165,7 @@ Users have song data in JSON format with inconsistent artist names, song titles,
|
||||
- **Primary:** Direct PostgreSQL database access
|
||||
- **Fallback:** MusicBrainz REST API (local server)
|
||||
- **Interface:** Command-line (CLI)
|
||||
- **Design Pattern:** Interface-based architecture with dependency injection
|
||||
|
||||
### Project Structure
|
||||
```
|
||||
@ -118,16 +173,27 @@ src/
|
||||
├── __init__.py # Package initialization
|
||||
├── api/ # API-related modules
|
||||
│ ├── __init__.py
|
||||
│ ├── database.py # Direct PostgreSQL access with fuzzy search
|
||||
│ └── api_client.py # Legacy HTTP API client (fallback)
|
||||
│ ├── database.py # Direct PostgreSQL access (implements MusicBrainzDataProvider)
|
||||
│ └── api_client.py # HTTP API client (implements MusicBrainzDataProvider)
|
||||
├── cli/ # Command-line interface
|
||||
│ ├── __init__.py
|
||||
│ └── main.py # Main CLI implementation
|
||||
│ └── main.py # Main CLI implementation (uses factory pattern)
|
||||
├── config/ # Configuration
|
||||
│ ├── __init__.py
|
||||
│ └── constants.py # Constants and settings
|
||||
├── core/ # Core functionality
|
||||
├── utils/ # Utility functions
|
||||
│ ├── __init__.py
|
||||
│ ├── interfaces.py # Common interfaces and protocols
|
||||
│ ├── factory.py # Data provider factory
|
||||
│ └── song_processor.py # Centralized song processing logic
|
||||
├── tests/ # Test files and scripts
|
||||
│ ├── __init__.py
|
||||
│ ├── test_*.py # Unit and integration tests
|
||||
│ └── debug_*.py # Debug scripts
|
||||
└── utils/ # Utility functions
|
||||
├── __init__.py
|
||||
├── artist_title_processing.py # Shared artist/title processing
|
||||
└── data_loader.py # Data loading utilities
|
||||
```
|
||||
|
||||
### Architectural Principles
|
||||
@ -138,12 +204,18 @@ src/
|
||||
- **Error Handling**: Graceful error handling with meaningful messages
|
||||
- **Performance First**: Direct database access for maximum speed
|
||||
- **Fallback Strategy**: Automatic fallback to API when database unavailable
|
||||
- **NEW**: **Database-First**: Always use live database data over static caches
|
||||
- **NEW**: **Intelligent Collaboration Detection**: Distinguish band names from collaborations
|
||||
- **Interface-Based Design**: Uses dependency injection with common interfaces
|
||||
- **Factory Pattern**: Clean provider creation and configuration
|
||||
- **Single Responsibility**: Each class has one clear purpose
|
||||
- **Database-First**: Always use live database data over static caches
|
||||
- **Intelligent Collaboration Detection**: Distinguish band names from collaborations
|
||||
- **Test Organization**: All test files must be placed in `src/tests/` directory, not in root
|
||||
|
||||
### Data Flow
|
||||
1. Read JSON input file
|
||||
2. For each song:
|
||||
1. **CLI** uses `DataProviderFactory` to create appropriate data provider (database or API)
|
||||
2. **SongProcessor** receives the data provider and processes songs using the common interface
|
||||
3. **Data Provider** (database or API) implements the same interface for consistent behavior
|
||||
4. For each song:
|
||||
- Clean artist name using name variations
|
||||
- Detect collaboration patterns
|
||||
- Use fuzzy search to find artist in database (including aliases, sort_names)
|
||||
@ -151,7 +223,7 @@ src/
|
||||
- For collaborations: find artist_credit and recording
|
||||
- For single artists: find recording by artist and title
|
||||
- Update song object with corrected data and MBIDs
|
||||
3. Write cleaned data to output file
|
||||
5. Write cleaned data to output file
|
||||
|
||||
### Fuzzy Search Implementation
|
||||
- **Algorithm**: Uses fuzzywuzzy library with multiple matching strategies
|
||||
@ -167,7 +239,7 @@ src/
|
||||
### Collaboration Detection Logic
|
||||
- **Primary Patterns**: "ft.", "feat.", "featuring" (always collaborations)
|
||||
- **Secondary Patterns**: "&", "and", "," (intelligent detection)
|
||||
- **Band Name Protection**: Hardcoded list of obvious band names
|
||||
- **Band Name Protection**: 200+ known band names loaded from `data/known_artists.json`
|
||||
- **Comma Detection**: Parts with commas are likely collaborations
|
||||
- **Word Count Analysis**: Single-word parts separated by "&" might be band names
|
||||
- **Case Insensitivity**: All pattern matching is case-insensitive
|
||||
@ -181,6 +253,23 @@ src/
|
||||
- Manual configuration needed for custom artist/recording mappings
|
||||
- **NEW**: Some edge cases may require manual intervention (data quality issues)
|
||||
|
||||
### Test File Organization - CRITICAL DIRECTIVE
|
||||
- **REQUIRED**: All test files MUST be placed in `src/tests/` directory
|
||||
- **PROHIBITED**: Test files should NEVER be placed in the root directory
|
||||
- **Naming Convention**: Test files should follow `test_*.py` or `debug_*.py` patterns
|
||||
- **Purpose**: Keeps root directory clean and organizes test code properly
|
||||
- **Import Path**: Tests can import from parent modules using relative imports
|
||||
|
||||
**⚠️ CRITICAL ENFORCEMENT**: This directive is ABSOLUTE and NON-NEGOTIABLE. Any test files created in the root directory will be immediately deleted and moved to the correct location.
|
||||
|
||||
### Using Tests for Issue Resolution
|
||||
- **FIRST STEP**: When encountering issues, check `src/tests/` directory for existing test files
|
||||
- **EXISTING TESTS**: Many common issues already have test cases that can help debug problems
|
||||
- **DEBUG SCRIPTS**: Look for `debug_*.py` files that may contain troubleshooting code
|
||||
- **SPECIFIC TESTS**: Search for test files related to the specific functionality having issues
|
||||
- **EXAMPLES**: Test files often contain working examples of how to use the functionality
|
||||
- **PATTERNS**: Existing tests show the correct patterns for database queries, API calls, and data processing
|
||||
|
||||
## Server Setup Requirements
|
||||
|
||||
### MusicBrainz Server Configuration
|
||||
@ -245,13 +334,23 @@ docker-compose logs -f musicbrainz
|
||||
- [x] Fuzzy search for artists and recordings
|
||||
- [x] Automatic fallback to API mode
|
||||
- [x] Performance optimizations
|
||||
- [x] **NEW**: Advanced collaboration detection and handling
|
||||
- [x] **NEW**: Artist alias and sort_name search
|
||||
- [x] **NEW**: Dash variation handling
|
||||
- [x] **NEW**: Numerical suffix handling
|
||||
- [x] **NEW**: Band name vs collaboration distinction
|
||||
- [x] **NEW**: Complex collaboration parsing
|
||||
- [x] **NEW**: Removed problematic known_artists cache
|
||||
- [x] Advanced collaboration detection and handling
|
||||
- [x] Artist alias and sort_name search
|
||||
- [x] Dash variation handling
|
||||
- [x] Numerical suffix handling
|
||||
- [x] Band name vs collaboration distinction
|
||||
- [x] Complex collaboration parsing
|
||||
- [x] Removed problematic known_artists cache
|
||||
- [x] Simplified CLI with default full dataset processing
|
||||
- [x] Separate output files for successful and failed songs (array format)
|
||||
- [x] Human-readable text reports with statistics
|
||||
- [x] Smart defaults for all file paths and options
|
||||
- [x] Configurable processing limits and output file paths
|
||||
- [x] **NEW**: Interface-based architecture with dependency injection
|
||||
- [x] **NEW**: Factory pattern for data provider creation
|
||||
- [x] **NEW**: Centralized song processing logic
|
||||
- [x] **NEW**: Common interfaces for database and API clients
|
||||
- [x] **NEW**: Clean separation of concerns
|
||||
|
||||
### 🔄 Future Enhancements
|
||||
- [ ] Web interface option
|
||||
@ -325,7 +424,7 @@ docker-compose logs -f musicbrainz
|
||||
- Database connection configuration
|
||||
- Fuzzy search similarity thresholds
|
||||
- **NEW**: Collaboration detection patterns
|
||||
- **NEW**: Band name protection list
|
||||
- **NEW**: Band name protection list (JSON configuration)
|
||||
|
||||
## Security Considerations
|
||||
|
||||
@ -353,14 +452,17 @@ pip install -r requirements.txt
|
||||
|
||||
### Usage
|
||||
```bash
|
||||
# Use database access (recommended, faster)
|
||||
python musicbrainz_cleaner.py input.json
|
||||
# Process all songs with default settings (recommended)
|
||||
docker-compose run --rm musicbrainz-cleaner python3 -m src.cli.main
|
||||
|
||||
# Process specific file with custom options
|
||||
docker-compose run --rm musicbrainz-cleaner python3 -m src.cli.main --source data/my_songs.json --limit 1000
|
||||
|
||||
# Force API mode (slower, fallback)
|
||||
python musicbrainz_cleaner.py input.json --use-api
|
||||
docker-compose run --rm musicbrainz-cleaner python3 -m src.cli.main --use-api
|
||||
|
||||
# Test connections
|
||||
python musicbrainz_cleaner.py --test-connection
|
||||
docker-compose run --rm musicbrainz-cleaner python3 -m src.cli.main --test-connection
|
||||
```
|
||||
|
||||
## Maintenance
|
||||
@ -371,15 +473,119 @@ python musicbrainz_cleaner.py --test-connection
|
||||
- Update dependencies as needed
|
||||
- Monitor database performance
|
||||
- Update fuzzy search thresholds based on usage
|
||||
- **NEW**: Review and update band name protection list
|
||||
- **NEW**: Review and update band name protection list in `data/known_artists.json`
|
||||
- **NEW**: Monitor collaboration detection accuracy
|
||||
|
||||
### Operational Procedures
|
||||
|
||||
#### After System Reboot
|
||||
1. **Start Docker Desktop** (if auto-start not enabled)
|
||||
2. **Restart MusicBrainz services**:
|
||||
```bash
|
||||
cd musicbrainz-cleaner
|
||||
./restart_services.sh
|
||||
```
|
||||
3. **Wait for database initialization** (5-10 minutes)
|
||||
4. **Test connection**:
|
||||
```bash
|
||||
docker-compose run --rm musicbrainz-cleaner python3 quick_test_20.py
|
||||
```
|
||||
|
||||
#### Service Management
|
||||
- **Start services**: `./start_services.sh` (full setup) or `./restart_services.sh` (quick restart)
|
||||
- **Stop services**: `cd ../musicbrainz-docker && docker-compose down`
|
||||
- **Check status**: `cd ../musicbrainz-docker && docker-compose ps`
|
||||
- **View logs**: `cd ../musicbrainz-docker && docker-compose logs -f`
|
||||
|
||||
#### Troubleshooting
|
||||
- **Port conflicts**: Use `MUSICBRAINZ_WEB_SERVER_PORT=5001` environment variable
|
||||
- **Container conflicts**: Run `docker-compose down` then restart
|
||||
- **Database issues**: Check logs with `docker-compose logs -f db`
|
||||
- **Memory issues**: Increase Docker Desktop memory allocation (8GB+ recommended)
|
||||
|
||||
#### Critical Startup Issues & Solutions
|
||||
|
||||
**Issue 1: Database Connection Refused**
|
||||
- **Symptoms**: Cleaner reports "Connection refused" when trying to connect to database
|
||||
- **Root Cause**: Database container not fully initialized or wrong host configuration
|
||||
- **Solution**:
|
||||
```bash
|
||||
# Check database status
|
||||
docker-compose logs db | tail -10
|
||||
|
||||
# Verify database is ready
|
||||
docker-compose exec db psql -U musicbrainz -d musicbrainz_db -c "SELECT COUNT(*) FROM artist;"
|
||||
```
|
||||
|
||||
**Issue 2: Wrong Database Host Configuration**
|
||||
- **Symptoms**: Cleaner tries to connect to `172.18.0.2` but fails
|
||||
- **Root Cause**: Hardcoded IP address in database connection
|
||||
- **Solution**: Use Docker service name `db` instead of IP address in `src/api/database.py`
|
||||
|
||||
**Issue 3: Test Script Logic Error**
|
||||
- **Symptoms**: Test shows 0% success rate despite finding artists
|
||||
- **Root Cause**: Test script checking `'mbid' in result` where `result` is a tuple
|
||||
- **Solution**: Extract song dictionary from tuple: `cleaned_song, success = result`
|
||||
|
||||
**Issue 4: Services Not Fully Initialized**
|
||||
- **Symptoms**: API returns empty results even though database has data
|
||||
- **Root Cause**: MusicBrainz web server still starting up
|
||||
- **Solution**: Wait for services to be fully ready and verify with health checks
|
||||
|
||||
### Support
|
||||
- GitHub issues for bug reports
|
||||
- Documentation updates
|
||||
- User feedback integration
|
||||
- Database connection troubleshooting guide
|
||||
- **NEW**: Collaboration detection troubleshooting guide
|
||||
- **NEW**: Test-based troubleshooting guide
|
||||
|
||||
### Troubleshooting with Tests
|
||||
When encountering issues, the `src/tests/` directory contains valuable resources:
|
||||
|
||||
#### **Step 1: Check for Existing Test Cases**
|
||||
```bash
|
||||
# List all available test files
|
||||
ls src/tests/
|
||||
|
||||
# Look for specific functionality tests
|
||||
ls src/tests/ | grep -i "collaboration"
|
||||
ls src/tests/ | grep -i "artist"
|
||||
ls src/tests/ | grep -i "database"
|
||||
```
|
||||
|
||||
#### **Step 2: Run Relevant Debug Scripts**
|
||||
```bash
|
||||
# Run debug scripts for specific issues
|
||||
python3 src/tests/debug_artist_search.py
|
||||
python3 src/tests/test_collaboration_debug.py
|
||||
python3 src/tests/test_failed_collaborations.py
|
||||
```
|
||||
|
||||
#### **Step 3: Use Test Files as Examples**
|
||||
- **Database Issues**: Check `test_simple_query.py` for database connection patterns
|
||||
- **Artist Search Issues**: Check `debug_artist_search.py` for search examples
|
||||
- **Collaboration Issues**: Check `test_failed_collaborations.py` for collaboration handling
|
||||
- **Title Cleaning Issues**: Check `test_title_cleaning.py` for title processing examples
|
||||
|
||||
#### **Step 4: Common Test Files by Issue Type**
|
||||
| Issue Type | Relevant Test Files |
|
||||
|------------|-------------------|
|
||||
| Database Connection | `test_simple_query.py`, `test_cli.py` |
|
||||
| Artist Search | `debug_artist_search.py`, `test_100_random.py` |
|
||||
| Collaboration Detection | `test_failed_collaborations.py`, `test_collaboration_debug.py` |
|
||||
| Title Processing | `test_title_cleaning.py` |
|
||||
| CLI Issues | `test_cli.py`, `quick_test_20.py` |
|
||||
| General Debugging | `debug_artist_search.py`, `test_100_random.py` |
|
||||
|
||||
#### **Step 5: Extract Working Code**
|
||||
Test files often contain working code snippets that can be adapted:
|
||||
- Database connection patterns
|
||||
- API call examples
|
||||
- Data processing logic
|
||||
- Error handling approaches
|
||||
|
||||
**⚠️ REMINDER**: All test files MUST be in `src/tests/` directory. NEVER create test files in the root directory.
|
||||
|
||||
## Lessons Learned
|
||||
|
||||
@ -405,4 +611,93 @@ python musicbrainz_cleaner.py --test-connection
|
||||
- **Remove static caches** for better accuracy
|
||||
- **Database-first approach** ensures live data
|
||||
- **Fuzzy search thresholds** need tuning for different datasets
|
||||
- **Connection pooling** would improve performance for large datasets
|
||||
- **Connection pooling** would improve performance for large datasets
|
||||
|
||||
### Operational Insights
|
||||
- **Docker Service Management**: MusicBrainz services require proper startup sequence and initialization time
|
||||
- **Port Conflicts**: Common on macOS, requiring automatic detection and resolution
|
||||
- **System Reboots**: Services need to be restarted after system reboots, but data persists in Docker volumes
|
||||
- **Resource Requirements**: MusicBrainz services require significant memory (8GB+ recommended) and disk space
|
||||
- **Platform Compatibility**: Apple Silicon (M1/M2) works but may show platform mismatch warnings
|
||||
- **Database Connection Issues**: Common startup problems include wrong host configuration and incomplete initialization
|
||||
- **Test Script Logic**: Critical to handle tuple return values from cleaner methods correctly
|
||||
|
||||
## CRITICAL PROJECT DIRECTIVE - TEST FILE ORGANIZATION
|
||||
|
||||
**⚠️ ABSOLUTE REQUIREMENT - NON-NEGOTIABLE**
|
||||
|
||||
### Test File Placement Rules
|
||||
- **REQUIRED**: ALL test files MUST be placed in `src/tests/` directory
|
||||
- **PROHIBITED**: Test files should NEVER be placed in the root directory
|
||||
- **ENFORCEMENT**: Any test files created in the root directory will be immediately deleted and moved to the correct location
|
||||
- **NON-NEGOTIABLE**: This directive is absolute and must be followed at all times
|
||||
|
||||
### Why This Matters
|
||||
- **Project Structure**: Keeps the root directory clean and organized
|
||||
- **Code Organization**: Groups all test-related code in one location
|
||||
- **Maintainability**: Makes it easier to find and manage test files
|
||||
- **Best Practices**: Follows standard Python project structure conventions
|
||||
|
||||
### Compliance Required
|
||||
- **ALL developers** must follow this directive
|
||||
- **ALL test files** must be in `src/tests/`
|
||||
- **NO EXCEPTIONS** to this rule
|
||||
- **IMMEDIATE CORRECTION** required for any violations
|
||||
|
||||
## Performance Optimizations
|
||||
|
||||
### Default Artist Sorting
|
||||
- **Enabled by default**: Songs are automatically sorted by artist name before processing
|
||||
- **Performance benefits**:
|
||||
- Better database query efficiency (similar artists processed together)
|
||||
- Improved caching behavior
|
||||
- Cleaner log output organization
|
||||
- **Optional disable**: Use `--no-sort` flag to preserve original order
|
||||
- **User experience**: Most users benefit from sorting, so it's the default
|
||||
|
||||
### Multiple Artist Candidate Search
|
||||
- **Intelligent artist selection**: Tries multiple artist candidates when first choice doesn't have the recording
|
||||
- **Recording-aware prioritization**: Artists with the specific recording are prioritized
|
||||
- **Fallback strategy**: Up to 5 different artist candidates are tried if needed
|
||||
- **Comprehensive search**: Searches names, aliases, and fuzzy matches
|
||||
|
||||
## Artist Lookup System
|
||||
|
||||
### Overview
|
||||
The MusicBrainz Data Cleaner now includes an advanced Artist Lookup System that provides fallback matching for artists not found in the primary database search. This system significantly improves artist matching success rates.
|
||||
|
||||
### Key Features
|
||||
- **2,446+ Artists**: Comprehensive lookup table with real and placeholder MBIDs
|
||||
- **4,950+ Variations**: Extensive name variations and aliases
|
||||
- **Fuzzy Matching**: Intelligent matching with configurable similarity thresholds
|
||||
- **Canonical Names**: Consistent artist name replacement across datasets
|
||||
- **Fallback System**: Secondary search when database lookup fails
|
||||
|
||||
### Data Structure
|
||||
```json
|
||||
{
|
||||
"artist_variations": {
|
||||
"Canonical Artist Name": {
|
||||
"mbid": "real-or-placeholder-mbid",
|
||||
"variations": [
|
||||
"Artist Name",
|
||||
"Artist Name Variation 1",
|
||||
"Artist Name Variation 2"
|
||||
],
|
||||
"notes": "Description or status"
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Usage
|
||||
- **Automatic Integration**: Works seamlessly with existing song processing
|
||||
- **CLI Management**: Full command-line interface for managing lookup data
|
||||
- **Search Capabilities**: Find artists by name with fuzzy matching
|
||||
- **Statistics**: Comprehensive reporting on lookup table usage
|
||||
|
||||
### Benefits
|
||||
- **Improved Success Rates**: Higher artist matching percentages
|
||||
- **Consistent Naming**: Standardized artist names across datasets
|
||||
- **Easy Management**: Simple tools for adding and updating artist data
|
||||
- **Scalable**: Can be extended with additional artists and variations
|
||||
503
README.md
503
README.md
@ -1,9 +1,54 @@
|
||||
# 🎵 MusicBrainz Data Cleaner v3.0
|
||||
# 🎵 MusicBrainz Data Cleaner v3.1
|
||||
|
||||
A powerful command-line tool that cleans and normalizes your song data using the MusicBrainz database. **Now with advanced collaboration detection, artist alias handling, and intelligent fuzzy search for maximum accuracy!**
|
||||
A powerful command-line tool that cleans and normalizes your song data using the MusicBrainz database. **Now with interface-based architecture, advanced collaboration detection, artist alias handling, intelligent fuzzy search, and a comprehensive Artist Lookup System for maximum accuracy!**
|
||||
|
||||
## ✨ What's New in v3.0
|
||||
## 🚀 Quick Start for New Sessions
|
||||
|
||||
**If you're starting fresh or after a reboot, follow this exact sequence:**
|
||||
|
||||
### 1. Start MusicBrainz Services
|
||||
```bash
|
||||
# Quick restart (recommended)
|
||||
./restart_services.sh
|
||||
|
||||
# Or full restart (if you have issues)
|
||||
./start_services.sh
|
||||
```
|
||||
|
||||
### 2. Wait for Services to Initialize
|
||||
- **Database**: 5-10 minutes to fully load
|
||||
- **Web server**: 2-3 minutes to start responding
|
||||
- **Check status**: `cd ../musicbrainz-docker && docker-compose ps`
|
||||
|
||||
### 3. Verify Services Are Ready
|
||||
```bash
|
||||
# Test web server
|
||||
curl -s http://localhost:5001 | head -5
|
||||
|
||||
# Test database (should show 2.6M+ artists)
|
||||
docker-compose exec db psql -U musicbrainz -d musicbrainz_db -c "SELECT COUNT(*) FROM artist;"
|
||||
|
||||
# Test cleaner connection
|
||||
docker-compose run --rm musicbrainz-cleaner python3 -c "from src.api.database import MusicBrainzDatabase; db = MusicBrainzDatabase(); print('Connection result:', db.connect())"
|
||||
```
|
||||
|
||||
### 4. Run Tests
|
||||
```bash
|
||||
# Test 100 random songs
|
||||
docker-compose run --rm musicbrainz-cleaner python3 test_100_random.py
|
||||
|
||||
# Or other test scripts
|
||||
docker-compose run --rm musicbrainz-cleaner python3 [script_name].py
|
||||
```
|
||||
|
||||
**⚠️ Important**: Always run scripts via Docker - the cleaner cannot connect to the database directly from outside the container.
|
||||
|
||||
**📋 Troubleshooting**: See `TROUBLESHOOTING.md` for common issues and solutions.
|
||||
|
||||
## ✨ What's New in v3.1
|
||||
|
||||
- **🏗️ Interface-Based Architecture**: Clean dependency injection with common interfaces
|
||||
- **🏭 Factory Pattern**: Smart data provider creation and configuration
|
||||
- **🚀 Direct Database Access**: Connect directly to PostgreSQL for 10x faster performance
|
||||
- **🎯 Advanced Fuzzy Search**: Intelligent matching for similar artist names and song titles
|
||||
- **🔄 Automatic Fallback**: Falls back to API mode if database access fails
|
||||
@ -14,6 +59,9 @@ A powerful command-line tool that cleans and normalizes your song data using the
|
||||
- **🆕 Sort Names**: Handle "Last, First" formats like "Corby, Matt" → "Matt Corby"
|
||||
- **🆕 Edge Case Handling**: Support for artists with hyphens, exclamation marks, numbers, and special characters
|
||||
- **🆕 Band Name Protection**: Distinguish between band names (Simon & Garfunkel) and collaborations (Lavato, Demi & Joe Jonas)
|
||||
- **🆕 Artist Lookup System**: Comprehensive fallback system with 2,446+ artists and 4,950+ variations
|
||||
- **🆕 Canonical Name Replacement**: Consistent artist naming across datasets
|
||||
- **🆕 CLI Management Tools**: Full command-line interface for managing artist lookup data
|
||||
|
||||
## ✨ What It Does
|
||||
|
||||
@ -39,50 +87,207 @@ A powerful command-line tool that cleans and normalizes your song data using the
|
||||
|
||||
## 🚀 Quick Start
|
||||
|
||||
### 1. Install Dependencies
|
||||
### Option 1: Automated Setup (Recommended)
|
||||
|
||||
1. **Start MusicBrainz services**:
|
||||
```bash
|
||||
./start_services.sh
|
||||
```
|
||||
This script will:
|
||||
- Check for Docker and port conflicts
|
||||
- Start all MusicBrainz services
|
||||
- Wait for database initialization
|
||||
- Create environment configuration
|
||||
- Test the connection
|
||||
|
||||
2. **Run the cleaner**:
|
||||
```bash
|
||||
docker-compose run --rm musicbrainz-cleaner python3 -m src.cli.main --input data/songs.json --output cleaned_songs.json
|
||||
```
|
||||
|
||||
### Option 2: Manual Setup
|
||||
|
||||
1. **Start MusicBrainz services manually**:
|
||||
```bash
|
||||
cd ../musicbrainz-docker
|
||||
MUSICBRAINZ_WEB_SERVER_PORT=5001 docker-compose up -d
|
||||
```
|
||||
Wait 5-10 minutes for database initialization.
|
||||
|
||||
2. **Create environment configuration**:
|
||||
```bash
|
||||
# Create .env file in musicbrainz-cleaner directory
|
||||
cat > .env << EOF
|
||||
DB_HOST=172.18.0.2
|
||||
DB_PORT=5432
|
||||
DB_NAME=musicbrainz_db
|
||||
DB_USER=musicbrainz
|
||||
DB_PASSWORD=musicbrainz
|
||||
MUSICBRAINZ_WEB_SERVER_PORT=5001
|
||||
EOF
|
||||
```
|
||||
|
||||
3. **Run the cleaner**:
|
||||
```bash
|
||||
docker-compose run --rm musicbrainz-cleaner python3 -m src.cli.main --input data/songs.json --output cleaned_songs.json
|
||||
```
|
||||
|
||||
### For detailed setup instructions, see [SETUP.md](SETUP.md)
|
||||
|
||||
## 🎯 Artist Lookup System
|
||||
|
||||
The MusicBrainz Data Cleaner includes an advanced Artist Lookup System that provides fallback matching for artists not found in the primary database search.
|
||||
|
||||
### Features
|
||||
- **2,446+ Artists**: Comprehensive lookup table with real and placeholder MBIDs
|
||||
- **4,950+ Variations**: Extensive name variations and aliases
|
||||
- **Fuzzy Matching**: Intelligent matching with configurable similarity thresholds
|
||||
- **Canonical Names**: Consistent artist name replacement across datasets
|
||||
- **Automatic Integration**: Works seamlessly with existing song processing
|
||||
|
||||
### Usage Examples
|
||||
|
||||
#### Search for Artists
|
||||
```bash
|
||||
pip install requests psycopg2-binary fuzzywuzzy python-Levenshtein
|
||||
# Search for an artist in the lookup table
|
||||
docker-compose run --rm musicbrainz-cleaner python3 -m src.cli.artist_lookup_cli search "Queen"
|
||||
|
||||
# Search with custom similarity threshold
|
||||
docker-compose run --rm musicbrainz-cleaner python3 -m src.cli.artist_lookup_cli search "Destiny's Child" --min-score 0.8
|
||||
```
|
||||
|
||||
### 2. Set Up MusicBrainz Server
|
||||
|
||||
#### Option A: Docker (Recommended)
|
||||
#### View Statistics
|
||||
```bash
|
||||
# Clone MusicBrainz Docker repository
|
||||
git clone https://github.com/metabrainz/musicbrainz-docker.git
|
||||
cd musicbrainz-docker
|
||||
|
||||
# Update postgres.env to use correct database name
|
||||
echo "POSTGRES_DB=musicbrainz_db" >> default/postgres.env
|
||||
|
||||
# Start the server
|
||||
docker-compose up -d
|
||||
|
||||
# Wait for database to be ready (can take 10-15 minutes)
|
||||
docker-compose logs -f musicbrainz
|
||||
# Show lookup table statistics
|
||||
docker-compose run --rm musicbrainz-cleaner python3 -m src.cli.artist_lookup_cli stats
|
||||
```
|
||||
|
||||
#### Option B: Manual Setup
|
||||
1. Install PostgreSQL 12+
|
||||
2. Create database: `createdb musicbrainz_db`
|
||||
3. Import MusicBrainz data dump
|
||||
4. Start MusicBrainz server on port 8080
|
||||
|
||||
### 3. Test Connection
|
||||
#### List All Artists
|
||||
```bash
|
||||
python musicbrainz_cleaner.py --test-connection
|
||||
# List all artists in the lookup table
|
||||
docker-compose run --rm musicbrainz-cleaner python3 -m src.cli.artist_lookup_cli list
|
||||
```
|
||||
|
||||
### 4. Run the Cleaner
|
||||
#### Add New Artists
|
||||
```bash
|
||||
# Use database access (recommended, faster)
|
||||
python musicbrainz_cleaner.py your_songs.json
|
||||
|
||||
# Force API mode (slower, fallback)
|
||||
python musicbrainz_cleaner.py your_songs.json --use-api
|
||||
# Add a new artist with variations
|
||||
docker-compose run --rm musicbrainz-cleaner python3 -m src.cli.artist_lookup_cli add \
|
||||
--canonical-name "New Artist" \
|
||||
--mbid "12345678-1234-1234-1234-123456789abc" \
|
||||
--variations "Artist, The Artist, Artist Band" \
|
||||
--notes "Description of the artist"
|
||||
```
|
||||
|
||||
That's it! Your cleaned data will be saved to `your_songs_cleaned.json`
|
||||
### Benefits
|
||||
- **Improved Success Rates**: Higher artist matching percentages
|
||||
- **Consistent Naming**: Standardized artist names across datasets
|
||||
- **Easy Management**: Simple tools for adding and updating artist data
|
||||
- **Scalable**: Can be extended with additional artists and variations
|
||||
|
||||
## 🔄 After System Reboot
|
||||
|
||||
After restarting your Mac, you'll need to restart the MusicBrainz services:
|
||||
|
||||
### Quick Restart (Recommended)
|
||||
```bash
|
||||
# If Docker Desktop is already running
|
||||
./restart_services.sh
|
||||
|
||||
# Or manually
|
||||
cd ../musicbrainz-docker && MUSICBRAINZ_WEB_SERVER_PORT=5001 docker-compose up -d
|
||||
```
|
||||
|
||||
### Full Restart (If you have issues)
|
||||
```bash
|
||||
# Complete setup including Docker checks
|
||||
./start_services.sh
|
||||
```
|
||||
|
||||
### Auto-start Setup (Optional)
|
||||
1. **Enable Docker Desktop auto-start**:
|
||||
- Open Docker Desktop
|
||||
- Go to Settings → General
|
||||
- Check "Start Docker Desktop when you log in"
|
||||
|
||||
2. **Then just run**: `./restart_services.sh` after each reboot
|
||||
|
||||
**Note**: Your data is preserved in Docker volumes, so you don't need to reconfigure anything after a reboot.
|
||||
|
||||
## 🚨 Common Startup Issues & Fixes
|
||||
|
||||
### Issue 1: Database Connection Refused
|
||||
**Problem**: Cleaner can't connect to database with error "Connection refused"
|
||||
**Root Cause**: Database container not fully initialized or wrong host configuration
|
||||
**Fix**:
|
||||
```bash
|
||||
# Wait for database to be ready (check logs)
|
||||
cd ../musicbrainz-docker && docker-compose logs db | tail -10
|
||||
|
||||
# Verify database is accepting connections
|
||||
docker-compose exec db psql -U musicbrainz -d musicbrainz_db -c "SELECT COUNT(*) FROM artist;"
|
||||
```
|
||||
|
||||
### Issue 2: Wrong Database Host Configuration
|
||||
**Problem**: Cleaner tries to connect to `172.18.0.2` but can't reach it
|
||||
**Root Cause**: Hardcoded IP address in database connection
|
||||
**Fix**: Use Docker service name `db` instead of IP address
|
||||
```python
|
||||
# In src/api/database.py, change:
|
||||
host='172.18.0.2' # ❌ Wrong
|
||||
host='db' # ✅ Correct
|
||||
```
|
||||
|
||||
### Issue 3: Test Script Logic Error
|
||||
**Problem**: Test shows 0% success rate despite finding artists
|
||||
**Root Cause**: Test script checking `'mbid' in result` where `result` is a tuple `(song_dict, success_boolean)`
|
||||
**Fix**: Extract song dictionary from tuple
|
||||
```python
|
||||
# Wrong:
|
||||
artist_found = 'mbid' in result
|
||||
|
||||
# Correct:
|
||||
cleaned_song, success = result
|
||||
artist_found = 'mbid' in cleaned_song
|
||||
```
|
||||
|
||||
### Issue 4: Services Not Fully Initialized
|
||||
**Problem**: API returns empty results even though database has data
|
||||
**Root Cause**: MusicBrainz web server still starting up
|
||||
**Fix**: Wait for services to be fully ready
|
||||
```bash
|
||||
# Check if web server is responding
|
||||
curl -s http://localhost:5001 | head -5
|
||||
|
||||
# Wait for database to be ready
|
||||
docker-compose logs db | grep "database system is ready"
|
||||
```
|
||||
|
||||
### Issue 5: Port Conflicts
|
||||
**Problem**: Port 5000 already in use
|
||||
**Root Cause**: Another service using the port
|
||||
**Fix**: Use alternative port
|
||||
```bash
|
||||
MUSICBRAINZ_WEB_SERVER_PORT=5001 docker-compose up -d
|
||||
```
|
||||
|
||||
### Issue 6: Container Name Conflicts
|
||||
**Problem**: "Container name already in use" error
|
||||
**Root Cause**: Previous containers not properly cleaned up
|
||||
**Fix**: Remove conflicting containers
|
||||
```bash
|
||||
docker-compose down
|
||||
docker rm -f <container_name>
|
||||
```
|
||||
|
||||
## 🔧 Startup Checklist
|
||||
|
||||
Before running tests, verify:
|
||||
1. ✅ Docker Desktop is running
|
||||
2. ✅ All containers are up: `docker-compose ps`
|
||||
3. ✅ Database is ready: `docker-compose logs db | grep "ready"`
|
||||
4. ✅ Web server responds: `curl -s http://localhost:5001`
|
||||
5. ✅ Database has data: `docker-compose exec db psql -U musicbrainz -d musicbrainz_db -c "SELECT COUNT(*) FROM artist;"`
|
||||
6. ✅ Cleaner can connect: Test database connection in cleaner
|
||||
|
||||
## 📋 Requirements
|
||||
|
||||
@ -115,8 +320,13 @@ That's it! Your cleaned data will be saved to `your_songs_cleaned.json`
|
||||
|
||||
## 🧪 Testing
|
||||
|
||||
Run the test suite to verify everything works correctly:
|
||||
### Test File Organization
|
||||
- **REQUIRED**: All test files must be placed in `src/tests/` directory
|
||||
- **PROHIBITED**: Test files should not be placed in the root directory
|
||||
- **Naming Convention**: Test files should follow `test_*.py` or `debug_*.py` patterns
|
||||
- **Purpose**: Keeps root directory clean and organizes test code properly
|
||||
|
||||
### Running Tests
|
||||
```bash
|
||||
# Run all tests
|
||||
python3 src/tests/run_tests.py
|
||||
@ -138,7 +348,33 @@ python3 src/tests/run_tests.py --list
|
||||
- **Integration Tests**: Test interactions between components and database
|
||||
- **Debug Tests**: Debug scripts and troubleshooting tools
|
||||
|
||||
## 📁 Data Files
|
||||
## 📁 Project Structure
|
||||
|
||||
```
|
||||
musicbrainz-cleaner/
|
||||
├── src/
|
||||
│ ├── api/ # Database and API access
|
||||
│ │ ├── database.py # Direct PostgreSQL access (implements MusicBrainzDataProvider)
|
||||
│ │ └── api_client.py # HTTP API client (implements MusicBrainzDataProvider)
|
||||
│ ├── cli/ # Command-line interface
|
||||
│ │ └── main.py # Main CLI implementation (uses factory pattern)
|
||||
│ ├── config/ # Configuration and constants
|
||||
│ ├── core/ # Core functionality
|
||||
│ │ ├── interfaces.py # Common interfaces and protocols
|
||||
│ │ ├── factory.py # Data provider factory
|
||||
│ │ └── song_processor.py # Centralized song processing logic
|
||||
│ ├── tests/ # Test files (REQUIRED location)
|
||||
│ └── utils/ # Utility functions
|
||||
│ ├── artist_title_processing.py # Shared artist/title processing
|
||||
│ └── data_loader.py # Data loading utilities
|
||||
├── data/ # Data files and output
|
||||
│ ├── known_artists.json # Name variations (ACDC → AC/DC)
|
||||
│ ├── known_recordings.json # Known recording MBIDs
|
||||
│ └── songs.json # Source songs file
|
||||
└── docker-compose.yml # Docker configuration
|
||||
```
|
||||
|
||||
### Data Files
|
||||
|
||||
The tool uses external JSON files for name variations:
|
||||
|
||||
@ -160,7 +396,7 @@ These files can be easily updated without touching the code, making it simple to
|
||||
### ✅ Collaboration Detection
|
||||
- **Primary Patterns**: "ft.", "feat.", "featuring" (always collaborations)
|
||||
- **Secondary Patterns**: "&", "and", "," (intelligent detection)
|
||||
- **Band Name Protection**: "Simon & Garfunkel" (not collaboration)
|
||||
- **Band Name Protection**: 200+ known band names from `data/known_artists.json`
|
||||
- **Complex Collaborations**: "Pitbull ft. Ne-Yo, Afrojack & Nayer"
|
||||
- **Case Insensitive**: "Featuring" → "featuring"
|
||||
|
||||
@ -192,40 +428,61 @@ These files can be easily updated without touching the code, making it simple to
|
||||
- **Numbers**: "98 Degrees", "S Club 7", "3 Doors Down"
|
||||
- **Special Characters**: "a-ha", "The B-52s", "Salt-N-Pepa"
|
||||
|
||||
### 🆕 Simplified Processing
|
||||
- **Default Behavior**: Process all songs by default (no special flags needed)
|
||||
- **Separate Output Files**: Successful and failed songs saved to different files
|
||||
- **Progress Tracking**: Real-time progress with song counter and status
|
||||
- **Smart Defaults**: Sensible defaults for all file paths and options
|
||||
- **Detailed Reporting**: Comprehensive statistics and processing report
|
||||
- **Batch Processing**: Efficient handling of large song collections
|
||||
|
||||
## 📖 Usage Examples
|
||||
|
||||
### Basic Usage
|
||||
### Basic Usage (Default)
|
||||
```bash
|
||||
# Clean your songs and save to auto-generated filename
|
||||
python musicbrainz_cleaner.py my_songs.json
|
||||
# Output: my_songs_cleaned.json
|
||||
# Process all songs with default settings (data/songs.json)
|
||||
docker-compose run --rm musicbrainz-cleaner python3 -m src.cli.main
|
||||
# Output: data/songs-success.json and data/songs-failure.json
|
||||
```
|
||||
|
||||
### Custom Output File
|
||||
### Custom Source File
|
||||
```bash
|
||||
# Specify your own output filename
|
||||
python musicbrainz_cleaner.py my_songs.json cleaned_songs.json
|
||||
# Process specific file
|
||||
docker-compose run --rm musicbrainz-cleaner python3 -m src.cli.main --source data/my_songs.json
|
||||
# Output: data/my_songs-success.json and data/my_songs-failure.json
|
||||
```
|
||||
|
||||
### Custom Output Files
|
||||
```bash
|
||||
# Specify custom output files
|
||||
docker-compose run --rm musicbrainz-cleaner python3 -m src.cli.main --source data/songs.json --output-success cleaned.json --output-failure failed.json
|
||||
```
|
||||
|
||||
### Limit Processing
|
||||
```bash
|
||||
# Process only first 1000 songs
|
||||
docker-compose run --rm musicbrainz-cleaner python3 -m src.cli.main --limit 1000
|
||||
```
|
||||
|
||||
### Force API Mode
|
||||
```bash
|
||||
# Use HTTP API instead of database (slower but works without PostgreSQL)
|
||||
python musicbrainz_cleaner.py my_songs.json --use-api
|
||||
docker-compose run --rm musicbrainz-cleaner python3 -m src.cli.main --use-api
|
||||
```
|
||||
|
||||
### Test Connections
|
||||
```bash
|
||||
# Test database connection
|
||||
python musicbrainz_cleaner.py --test-connection
|
||||
docker-compose run --rm musicbrainz-cleaner python3 -m src.cli.main --test-connection
|
||||
|
||||
# Test with API mode
|
||||
python musicbrainz_cleaner.py --test-connection --use-api
|
||||
docker-compose run --rm musicbrainz-cleaner python3 -m src.cli.main --test-connection --use-api
|
||||
```
|
||||
|
||||
### Help
|
||||
```bash
|
||||
# Show usage information
|
||||
python musicbrainz_cleaner.py --help
|
||||
docker-compose run --rm musicbrainz-cleaner python3 -m src.cli.main --help
|
||||
```
|
||||
|
||||
## 📁 Data Files
|
||||
@ -256,7 +513,10 @@ Your JSON file should contain an array of song objects:
|
||||
|
||||
## 📤 Output Format
|
||||
|
||||
The tool will update your objects with corrected data:
|
||||
The tool creates **three output files**:
|
||||
|
||||
### 1. Successful Songs (`source-success.json`)
|
||||
Array of successfully processed songs with MBIDs added:
|
||||
|
||||
```json
|
||||
[
|
||||
@ -283,39 +543,123 @@ The tool will update your objects with corrected data:
|
||||
]
|
||||
```
|
||||
|
||||
### 2. Failed Songs (`source-failure.json`)
|
||||
Array of songs that couldn't be processed (same format as source):
|
||||
|
||||
```json
|
||||
[
|
||||
{
|
||||
"artist": "Unknown Artist",
|
||||
"title": "Unknown Song",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"guid": "12345678-1234-1234-1234-123456789012",
|
||||
"path": "z://MP4\\Unknown Artist - Unknown Song.mp4"
|
||||
}
|
||||
]
|
||||
```
|
||||
|
||||
### 3. Processing Report (`processing_report_YYYYMMDD_HHMMSS.txt`)
|
||||
Human-readable text report with statistics and failed song list:
|
||||
|
||||
```
|
||||
MusicBrainz Data Cleaner - Processing Report
|
||||
==================================================
|
||||
|
||||
Source File: data/songs.json
|
||||
Processing Date: 2024-12-19 14:30:22
|
||||
Processing Time: 15263.3 seconds
|
||||
|
||||
SUMMARY
|
||||
--------------------
|
||||
Total Songs Processed: 49,170
|
||||
Successful Songs: 40,692
|
||||
Failed Songs: 8,478
|
||||
Success Rate: 82.8%
|
||||
|
||||
DETAILED STATISTICS
|
||||
--------------------
|
||||
Artists Found: 44,526/49,170 (90.6%)
|
||||
Recordings Found: 40,998/49,170 (83.4%)
|
||||
Processing Speed: 3.2 songs/second
|
||||
|
||||
OUTPUT FILES
|
||||
--------------------
|
||||
Successful Songs: data/songs-success.json
|
||||
Failed Songs: data/songs-failure.json
|
||||
Report File: data/processing_report_20241219_143022.txt
|
||||
|
||||
FAILED SONGS (First 50)
|
||||
--------------------
|
||||
1. Unknown Artist - Unknown Song
|
||||
2. Invalid Artist - Invalid Title
|
||||
3. Test Artist - Test Song
|
||||
...
|
||||
```
|
||||
|
||||
## 🎬 Example Run
|
||||
|
||||
### Basic Processing
|
||||
```bash
|
||||
$ python musicbrainz_cleaner.py data/sample_songs.json
|
||||
$ docker-compose run --rm musicbrainz-cleaner python3 -m src.cli.main
|
||||
|
||||
Processing 3 songs...
|
||||
🚀 Starting song processing...
|
||||
📊 Total songs to process: 49,170
|
||||
Using database connection
|
||||
==================================================
|
||||
|
||||
[1/3] Processing: ACDC - Shot In The Dark
|
||||
🎯 Fuzzy match found: ACDC → AC/DC (score: 0.85)
|
||||
✅ Found artist: AC/DC (MBID: 66c662b6-6e2f-4930-8610-912e24c63ed1)
|
||||
🎯 Fuzzy match found: Shot In The Dark → Shot in the Dark (score: 0.92)
|
||||
✅ Found recording: Shot in the Dark (MBID: cf8b5cd0-d97c-413d-882f-fc422a2e57db)
|
||||
✅ Updated to: AC/DC - Shot in the Dark
|
||||
[1 of 49,170] ✅ PASS: ACDC - Shot In The Dark
|
||||
[2 of 49,170] ❌ FAIL: Unknown Artist - Unknown Song
|
||||
[3 of 49,170] ✅ PASS: Bruno Mars feat. Cardi B - Finesse (remix)
|
||||
[4 of 49,170] ✅ PASS: Taylor Swift - Love Story
|
||||
...
|
||||
|
||||
[2/3] Processing: Bruno Mars ft. Cardi B - Finesse Remix
|
||||
🎯 Fuzzy match found: Bruno Mars → Bruno Mars (score: 1.00)
|
||||
✅ Found artist: Bruno Mars (MBID: afb680f2-b6eb-4cd7-a70b-a63b25c763d5)
|
||||
🎯 Fuzzy match found: Finesse Remix → Finesse (remix) (score: 0.88)
|
||||
✅ Found recording: Finesse (remix) (MBID: 8ed14014-547a-4128-ab81-c2dca7ae198e)
|
||||
✅ Updated to: Bruno Mars feat. Cardi B - Finesse (remix)
|
||||
|
||||
[3/3] Processing: Taylor Swift - Love Story
|
||||
🎯 Fuzzy match found: Taylor Swift → Taylor Swift (score: 1.00)
|
||||
✅ Found artist: Taylor Swift (MBID: 20244d07-534f-4eff-b4d4-930878889970)
|
||||
🎯 Fuzzy match found: Love Story → Love Story (score: 1.00)
|
||||
✅ Found recording: Love Story (MBID: d783e6c5-761f-4fc3-bfcf-6089cdfc8f96)
|
||||
✅ Updated to: Taylor Swift - Love Story
|
||||
📈 Progress: 100/49,170 (0.2%) - Success: 85.0% - Rate: 3.2 songs/sec
|
||||
📈 Progress: 200/49,170 (0.4%) - Success: 87.5% - Rate: 3.1 songs/sec
|
||||
...
|
||||
|
||||
==================================================
|
||||
✅ Processing complete!
|
||||
📁 Output saved to: data/sample_songs_cleaned.json
|
||||
🎉 Processing completed!
|
||||
📊 Final Results:
|
||||
⏱️ Total processing time: 15263.3 seconds
|
||||
🚀 Average speed: 3.2 songs/second
|
||||
✅ Artists found: 44,526/49,170 (90.6%)
|
||||
✅ Recordings found: 40,998/49,170 (83.4%)
|
||||
❌ Failed songs: 8,478 (17.2%)
|
||||
📄 Files saved:
|
||||
✅ Successful songs: data/songs-success.json
|
||||
❌ Failed songs: data/songs-failure.json
|
||||
📋 Text report: data/processing_report_20241219_143022.txt
|
||||
📊 JSON report: data/processing_report_20241219_143022.json
|
||||
```
|
||||
|
||||
### Limited Processing
|
||||
```bash
|
||||
$ docker-compose run --rm musicbrainz-cleaner python3 -m src.cli.main --limit 1000
|
||||
|
||||
⚠️ Limiting processing to first 1000 songs
|
||||
🚀 Starting song processing...
|
||||
📊 Total songs to process: 1,000
|
||||
Using database connection
|
||||
==================================================
|
||||
|
||||
[1 of 1,000] ✅ PASS: ACDC - Shot In The Dark
|
||||
[2 of 1,000] ❌ FAIL: Unknown Artist - Unknown Song
|
||||
...
|
||||
|
||||
==================================================
|
||||
🎉 Processing completed!
|
||||
📊 Final Results:
|
||||
⏱️ Total processing time: 312.5 seconds
|
||||
🚀 Average speed: 3.2 songs/second
|
||||
✅ Artists found: 856/1,000 (85.6%)
|
||||
✅ Recordings found: 789/1,000 (78.9%)
|
||||
❌ Failed songs: 211 (21.1%)
|
||||
📄 Files saved:
|
||||
✅ Successful songs: data/songs-success.json
|
||||
❌ Failed songs: data/songs-failure.json
|
||||
📋 Text report: data/processing_report_20241219_143022.txt
|
||||
📊 JSON report: data/processing_report_20241219_143022.json
|
||||
```
|
||||
|
||||
## 🔧 Troubleshooting
|
||||
@ -357,6 +701,13 @@ Using database connection
|
||||
- **NEW**: Verify the collaboration pattern is supported (ft., feat., featuring, &, and, ,)
|
||||
- **NEW**: Check case sensitivity - patterns are case-insensitive
|
||||
|
||||
### Using Tests for Troubleshooting
|
||||
- **FIRST STEP**: Check `src/tests/` directory for existing test files that might help
|
||||
- **DEBUG SCRIPTS**: Run `python3 src/tests/debug_artist_search.py` for artist search issues
|
||||
- **COLLABORATION ISSUES**: Check `src/tests/test_failed_collaborations.py` for collaboration examples
|
||||
- **DATABASE ISSUES**: Look at `src/tests/test_simple_query.py` for database connection patterns
|
||||
- **WORKING EXAMPLES**: Test files often contain working code that can be adapted for your issue
|
||||
|
||||
## 🎯 Use Cases
|
||||
|
||||
- **Karaoke Systems**: Clean up song metadata for better search and organization
|
||||
@ -451,6 +802,12 @@ This tool is provided as-is for educational and personal use.
|
||||
- **Fuzzy search thresholds** need tuning for different datasets
|
||||
- **Connection pooling** would improve performance for large datasets
|
||||
|
||||
### CLI Design
|
||||
- **Simplified interface** with smart defaults reduces complexity
|
||||
- **Array format consistency** makes output files easier to work with
|
||||
- **Human-readable reports** improve user experience
|
||||
- **Test file organization** keeps project structure clean
|
||||
|
||||
---
|
||||
|
||||
**Happy cleaning! 🎵✨**
|
||||
266
SETUP.md
Normal file
266
SETUP.md
Normal file
@ -0,0 +1,266 @@
|
||||
# MusicBrainz Cleaner Setup Guide
|
||||
|
||||
This guide will help you set up the MusicBrainz database and Docker services needed to run the cleaner.
|
||||
|
||||
## Prerequisites
|
||||
|
||||
- Docker Desktop installed and running
|
||||
- At least 8GB of available RAM
|
||||
- At least 10GB of free disk space
|
||||
- Git (to clone the repositories)
|
||||
|
||||
## Step 1: Clone the MusicBrainz Server Repository
|
||||
|
||||
```bash
|
||||
# Clone the main MusicBrainz server repository (if not already done)
|
||||
git clone https://github.com/metabrainz/musicbrainz-server.git
|
||||
cd musicbrainz-server
|
||||
```
|
||||
|
||||
## Step 2: Start the MusicBrainz Docker Services
|
||||
|
||||
The MusicBrainz server uses Docker Compose to run multiple services including PostgreSQL, Solr search, Redis, and the web server.
|
||||
|
||||
```bash
|
||||
# Navigate to the musicbrainz-docker directory
|
||||
cd musicbrainz-docker
|
||||
|
||||
# Check if port 5000 is available (common conflict on macOS)
|
||||
lsof -i :5000
|
||||
|
||||
# If port 5000 is in use, use port 5001 instead
|
||||
MUSICBRAINZ_WEB_SERVER_PORT=5001 docker-compose up -d
|
||||
|
||||
# Or if port 5000 is free, use the default
|
||||
docker-compose up -d
|
||||
```
|
||||
|
||||
### Troubleshooting Port Conflicts
|
||||
|
||||
If you get a port conflict error:
|
||||
|
||||
```bash
|
||||
# Kill any process using port 5000
|
||||
lsof -ti:5000 | xargs kill -9
|
||||
|
||||
# Or use a different port
|
||||
MUSICBRAINZ_WEB_SERVER_PORT=5001 docker-compose up -d
|
||||
```
|
||||
|
||||
### Troubleshooting Container Conflicts
|
||||
|
||||
If you get container name conflicts:
|
||||
|
||||
```bash
|
||||
# Remove existing containers
|
||||
docker-compose down
|
||||
|
||||
# Force remove conflicting containers
|
||||
docker rm -f musicbrainz-docker-db-1
|
||||
|
||||
# Start fresh
|
||||
docker-compose up -d
|
||||
```
|
||||
|
||||
## Step 3: Wait for Services to Start
|
||||
|
||||
The services take time to initialize, especially the database:
|
||||
|
||||
```bash
|
||||
# Check service status
|
||||
docker-compose ps
|
||||
|
||||
# Wait for all services to be healthy (this can take 5-10 minutes)
|
||||
docker-compose logs -f db
|
||||
```
|
||||
|
||||
**Important**: Wait until you see database initialization complete messages before proceeding.
|
||||
|
||||
## Step 4: Verify Services Are Running
|
||||
|
||||
```bash
|
||||
# Check all containers are running
|
||||
docker-compose ps
|
||||
|
||||
# Test the web interface (if using port 5001)
|
||||
curl http://localhost:5001
|
||||
|
||||
# Or if using default port 5000
|
||||
curl http://localhost:5000
|
||||
```
|
||||
|
||||
## Step 5: Set Environment Variables
|
||||
|
||||
Create a `.env` file in the `musicbrainz-cleaner` directory:
|
||||
|
||||
```bash
|
||||
cd ../musicbrainz-cleaner
|
||||
|
||||
# Create .env file
|
||||
cat > .env << EOF
|
||||
# Database connection (default Docker setup)
|
||||
DB_HOST=172.18.0.2
|
||||
DB_PORT=5432
|
||||
DB_NAME=musicbrainz_db
|
||||
DB_USER=musicbrainz
|
||||
DB_PASSWORD=musicbrainz
|
||||
|
||||
# MusicBrainz web server
|
||||
MUSICBRAINZ_WEB_SERVER_PORT=5001
|
||||
EOF
|
||||
```
|
||||
|
||||
**Note**: If you used the default port 5000, change `MUSICBRAINZ_WEB_SERVER_PORT=5001` to `MUSICBRAINZ_WEB_SERVER_PORT=5000`.
|
||||
|
||||
## Step 6: Test the Connection
|
||||
|
||||
```bash
|
||||
# Run a simple test to verify everything is working
|
||||
docker-compose run --rm musicbrainz-cleaner python3 quick_test_20.py
|
||||
```
|
||||
|
||||
## Service Details
|
||||
|
||||
The Docker Compose setup includes:
|
||||
|
||||
- **PostgreSQL Database** (`db`): Main MusicBrainz database
|
||||
- **Solr Search** (`search`): Full-text search engine
|
||||
- **Redis** (`redis`): Caching and session storage
|
||||
- **Message Queue** (`mq`): Background job processing
|
||||
- **MusicBrainz Web Server** (`musicbrainz`): Main web application
|
||||
- **Indexer** (`indexer`): Search index maintenance
|
||||
|
||||
## Ports Used
|
||||
|
||||
- **5000/5001**: MusicBrainz web server (configurable)
|
||||
- **5432**: PostgreSQL database (internal)
|
||||
- **8983**: Solr search (internal)
|
||||
- **6379**: Redis (internal)
|
||||
- **5672**: Message queue (internal)
|
||||
|
||||
## Stopping Services
|
||||
|
||||
```bash
|
||||
# Stop all services
|
||||
cd musicbrainz-docker
|
||||
docker-compose down
|
||||
|
||||
# To also remove volumes (WARNING: this deletes all data)
|
||||
docker-compose down -v
|
||||
```
|
||||
|
||||
## Restarting Services
|
||||
|
||||
```bash
|
||||
# Restart all services
|
||||
docker-compose restart
|
||||
|
||||
# Or restart specific service
|
||||
docker-compose restart db
|
||||
```
|
||||
|
||||
## Monitoring Services
|
||||
|
||||
```bash
|
||||
# View logs for all services
|
||||
docker-compose logs -f
|
||||
|
||||
# View logs for specific service
|
||||
docker-compose logs -f db
|
||||
docker-compose logs -f musicbrainz
|
||||
|
||||
# Check resource usage
|
||||
docker stats
|
||||
```
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Database Connection Issues
|
||||
|
||||
```bash
|
||||
# Check if database is running
|
||||
docker-compose ps db
|
||||
|
||||
# Check database logs
|
||||
docker-compose logs db
|
||||
|
||||
# Test database connection
|
||||
docker-compose exec db psql -U musicbrainz -d musicbrainz_db -c "SELECT 1;"
|
||||
```
|
||||
|
||||
### Memory Issues
|
||||
|
||||
If you encounter memory issues:
|
||||
|
||||
```bash
|
||||
# Increase Docker memory limit in Docker Desktop settings
|
||||
# Recommended: 8GB minimum, 16GB preferred
|
||||
|
||||
# Check current memory usage
|
||||
docker stats
|
||||
```
|
||||
|
||||
### Platform Issues (Apple Silicon)
|
||||
|
||||
If you're on Apple Silicon (M1/M2) and see platform warnings:
|
||||
|
||||
```bash
|
||||
# The services will still work, but you may see warnings about platform mismatch
|
||||
# This is normal and doesn't affect functionality
|
||||
```
|
||||
|
||||
## Performance Tips
|
||||
|
||||
1. **Allocate sufficient memory** to Docker Desktop (8GB+ recommended)
|
||||
2. **Use SSD storage** for better database performance
|
||||
3. **Close other resource-intensive applications** while running the services
|
||||
4. **Wait for full initialization** before running tests
|
||||
|
||||
## Next Steps
|
||||
|
||||
Once the services are running successfully:
|
||||
|
||||
1. Run the quick test: `python3 quick_test_20.py`
|
||||
2. Run larger tests: `python3 bulk_test_1000.py`
|
||||
3. Use the cleaner on your own data: `python3 -m src.cli.main --input your_file.json --output cleaned.json`
|
||||
|
||||
## 🔄 After System Reboot
|
||||
|
||||
After restarting your Mac, you'll need to restart the MusicBrainz services:
|
||||
|
||||
### Quick Restart (Recommended)
|
||||
```bash
|
||||
# Navigate to musicbrainz-cleaner directory
|
||||
cd /Users/mattbruce/Documents/Projects/musicbrainz-server/musicbrainz-cleaner
|
||||
|
||||
# If Docker Desktop is already running
|
||||
./restart_services.sh
|
||||
|
||||
# Or manually
|
||||
cd ../musicbrainz-docker && MUSICBRAINZ_WEB_SERVER_PORT=5001 docker-compose up -d
|
||||
```
|
||||
|
||||
### Full Restart (If you have issues)
|
||||
```bash
|
||||
# Complete setup including Docker checks
|
||||
./start_services.sh
|
||||
```
|
||||
|
||||
### Auto-start Setup (Optional)
|
||||
1. **Enable Docker Desktop auto-start**:
|
||||
- Open Docker Desktop
|
||||
- Go to Settings → General
|
||||
- Check "Start Docker Desktop when you log in"
|
||||
|
||||
2. **Then just run**: `./restart_services.sh` after each reboot
|
||||
|
||||
**Note**: Your data is preserved in Docker volumes, so you don't need to reconfigure anything after a reboot.
|
||||
|
||||
## Support
|
||||
|
||||
If you encounter issues:
|
||||
|
||||
1. Check the logs: `docker-compose logs -f`
|
||||
2. Verify Docker has sufficient resources
|
||||
3. Ensure all prerequisites are met
|
||||
4. Try restarting the services: `docker-compose restart`
|
||||
214
TROUBLESHOOTING.md
Normal file
214
TROUBLESHOOTING.md
Normal file
@ -0,0 +1,214 @@
|
||||
# 🚨 MusicBrainz Cleaner Troubleshooting Guide
|
||||
|
||||
This guide documents common issues encountered when starting and running the MusicBrainz cleaner, along with their solutions.
|
||||
|
||||
## 📋 Key Files for New Sessions
|
||||
|
||||
When starting a new chat session, reference these files in order:
|
||||
|
||||
1. **`README.md`** - Quick start guide and basic usage
|
||||
2. **`PRD.md`** - Technical specifications and requirements
|
||||
3. **`SETUP.md`** - Detailed setup instructions
|
||||
4. **`TROUBLESHOOTING.md`** - This file - common issues and solutions
|
||||
5. **`start_services.sh`** - Automated service startup script
|
||||
6. **`restart_services.sh`** - Quick restart script for after reboots
|
||||
|
||||
## Quick Diagnostic Commands
|
||||
|
||||
```bash
|
||||
# Check if Docker is running
|
||||
docker --version
|
||||
|
||||
# Check container status
|
||||
cd ../musicbrainz-docker && docker-compose ps
|
||||
|
||||
# Check database logs
|
||||
docker-compose logs db | tail -10
|
||||
|
||||
# Check web server logs
|
||||
docker-compose logs musicbrainz | tail -10
|
||||
|
||||
# Test web server response
|
||||
curl -s http://localhost:5001 | head -5
|
||||
|
||||
# Test database connection
|
||||
docker-compose exec db psql -U musicbrainz -d musicbrainz_db -c "SELECT COUNT(*) FROM artist;"
|
||||
|
||||
# Test cleaner connection
|
||||
cd ../musicbrainz-cleaner && docker-compose run --rm musicbrainz-cleaner python3 -c "from src.api.database import MusicBrainzDatabase; db = MusicBrainzDatabase(); print('Connection result:', db.connect())"
|
||||
```
|
||||
|
||||
## Common Issues & Solutions
|
||||
|
||||
### 🚫 Issue 1: Database Connection Refused
|
||||
|
||||
**Error Message:**
|
||||
```
|
||||
Connection error: connection to server at "172.18.0.2", port 5432 failed: Connection refused
|
||||
```
|
||||
|
||||
**Root Cause:** Database container not fully initialized or wrong host configuration
|
||||
|
||||
**Solutions:**
|
||||
|
||||
1. **Wait for database initialization:**
|
||||
```bash
|
||||
cd ../musicbrainz-docker
|
||||
docker-compose logs db | grep "database system is ready"
|
||||
```
|
||||
|
||||
2. **Fix host configuration in database.py:**
|
||||
```python
|
||||
# Change this line in src/api/database.py:
|
||||
host='172.18.0.2' # ❌ Wrong
|
||||
host='db' # ✅ Correct
|
||||
```
|
||||
|
||||
3. **Verify database is ready:**
|
||||
```bash
|
||||
docker-compose exec db psql -U musicbrainz -d musicbrainz_db -c "SELECT COUNT(*) FROM artist;"
|
||||
```
|
||||
|
||||
### 🚫 Issue 2: Test Shows 0% Success Rate
|
||||
|
||||
**Symptoms:** Test script reports 0% success despite finding artists in logs
|
||||
|
||||
**Root Cause:** Test script logic error - checking `'mbid' in result` where `result` is a tuple
|
||||
|
||||
**Solution:** Fix test script to extract song dictionary from tuple:
|
||||
```python
|
||||
# Wrong:
|
||||
artist_found = 'mbid' in result
|
||||
|
||||
# Correct:
|
||||
cleaned_song, success = result
|
||||
artist_found = 'mbid' in cleaned_song
|
||||
```
|
||||
|
||||
### 🚫 Issue 3: Port Already in Use
|
||||
|
||||
**Error Message:**
|
||||
```
|
||||
ports are not available: exposing port TCP 0.0.0.0:5000 ... bind: address already in use
|
||||
```
|
||||
|
||||
**Solution:**
|
||||
```bash
|
||||
# Kill process using port 5000
|
||||
lsof -ti:5000 | xargs kill -9
|
||||
|
||||
# Or use alternative port
|
||||
MUSICBRAINZ_WEB_SERVER_PORT=5001 docker-compose up -d
|
||||
```
|
||||
|
||||
### 🚫 Issue 4: Container Name Conflicts
|
||||
|
||||
**Error Message:**
|
||||
```
|
||||
Conflict. The container name ... is already in use
|
||||
```
|
||||
|
||||
**Solution:**
|
||||
```bash
|
||||
# Stop and remove existing containers
|
||||
docker-compose down
|
||||
|
||||
# Force remove specific container if needed
|
||||
docker rm -f <container_name>
|
||||
|
||||
# Restart services
|
||||
docker-compose up -d
|
||||
```
|
||||
|
||||
### 🚫 Issue 5: Docker Not Running
|
||||
|
||||
**Error Message:**
|
||||
```
|
||||
Cannot connect to the Docker daemon
|
||||
```
|
||||
|
||||
**Solution:**
|
||||
```bash
|
||||
# Start Docker Desktop
|
||||
open -a Docker
|
||||
|
||||
# Wait for Docker to start, then restart services
|
||||
./restart_services.sh
|
||||
```
|
||||
|
||||
### 🚫 Issue 6: API Returns Empty Results
|
||||
|
||||
**Symptoms:** API calls return empty results even though database has data
|
||||
|
||||
**Root Cause:** MusicBrainz web server not fully initialized
|
||||
|
||||
**Solution:**
|
||||
```bash
|
||||
# Wait for web server to be ready
|
||||
sleep 60
|
||||
|
||||
# Test API response
|
||||
curl -s "http://localhost:5001/ws/2/artist/?query=name:The%20Beatles&fmt=json"
|
||||
```
|
||||
|
||||
## Startup Checklist
|
||||
|
||||
Before running any tests, verify:
|
||||
|
||||
1. ✅ **Docker Desktop is running**
|
||||
2. ✅ **All containers are up:** `docker-compose ps`
|
||||
3. ✅ **Database is ready:** `docker-compose logs db | grep "ready"`
|
||||
4. ✅ **Web server responds:** `curl -s http://localhost:5001`
|
||||
5. ✅ **Database has data:** Check artist count in database
|
||||
6. ✅ **Cleaner can connect:** Test database connection in cleaner
|
||||
|
||||
## Performance Issues
|
||||
|
||||
### Slow Processing
|
||||
- **Cause:** Database queries taking too long
|
||||
- **Solution:** Ensure database has proper indexes and is fully loaded
|
||||
|
||||
### Memory Issues
|
||||
- **Cause:** Docker Desktop memory allocation too low
|
||||
- **Solution:** Increase Docker Desktop memory to 8GB+
|
||||
|
||||
### Platform Warnings
|
||||
- **Cause:** Apple Silicon (M1/M2) platform mismatch
|
||||
- **Solution:** These warnings can be ignored - services work correctly
|
||||
|
||||
## Recovery Procedures
|
||||
|
||||
### Complete Reset
|
||||
```bash
|
||||
# Stop all services
|
||||
cd ../musicbrainz-docker && docker-compose down
|
||||
|
||||
# Remove all containers and volumes (⚠️ WARNING: This deletes data)
|
||||
docker-compose down -v
|
||||
|
||||
# Restart from scratch
|
||||
./start_services.sh
|
||||
```
|
||||
|
||||
### Quick Restart
|
||||
```bash
|
||||
# Quick restart (preserves data)
|
||||
./restart_services.sh
|
||||
```
|
||||
|
||||
## Getting Help
|
||||
|
||||
If you encounter issues not covered in this guide:
|
||||
|
||||
1. Check the logs: `docker-compose logs -f`
|
||||
2. Verify system requirements are met
|
||||
3. Try the complete reset procedure
|
||||
4. Check the main README.md for additional troubleshooting steps
|
||||
|
||||
## Prevention Tips
|
||||
|
||||
1. **Always use the startup scripts** (`start_services.sh` or `restart_services.sh`)
|
||||
2. **Wait for services to fully initialize** before running tests
|
||||
3. **Use the startup checklist** before running any tests
|
||||
4. **Keep Docker Desktop memory allocation** at 8GB or higher
|
||||
5. **Use port 5001** if port 5000 is busy
|
||||
17191
data/artist_lookup.json
Normal file
17191
data/artist_lookup.json
Normal file
File diff suppressed because it is too large
Load Diff
88
data/contraction_fixes.json
Executable file
88
data/contraction_fixes.json
Executable file
@ -0,0 +1,88 @@
|
||||
{
|
||||
"dont": "don't",
|
||||
"don t": "don't",
|
||||
"cant": "can't",
|
||||
"can t": "can't",
|
||||
"wont": "won't",
|
||||
"won t": "won't",
|
||||
"im": "I'm",
|
||||
"i m": "I'm",
|
||||
"ive": "I've",
|
||||
"i ve": "I've",
|
||||
"id": "I'd",
|
||||
"i d": "I'd",
|
||||
"ill": "I'll",
|
||||
"i ll": "I'll",
|
||||
"isnt": "isn't",
|
||||
"isn t": "isn't",
|
||||
"arent": "aren't",
|
||||
"aren t": "aren't",
|
||||
"wasnt": "wasn't",
|
||||
"wasn t": "wasn't",
|
||||
"werent": "weren't",
|
||||
"weren t": "weren't",
|
||||
"hasnt": "hasn't",
|
||||
"hasn t": "hasn't",
|
||||
"havent": "haven't",
|
||||
"haven t": "haven't",
|
||||
"shouldnt": "shouldn't",
|
||||
"shouldn t": "shouldn't",
|
||||
"couldnt": "couldn't",
|
||||
"couldn t": "couldn't",
|
||||
"wouldnt": "wouldn't",
|
||||
"wouldn t": "wouldn't",
|
||||
"didnt": "didn't",
|
||||
"didn t": "didn't",
|
||||
"theyre": "they're",
|
||||
"they re": "they're",
|
||||
"youre": "you're",
|
||||
"you re": "you're",
|
||||
"whos": "who's",
|
||||
"who s": "who's",
|
||||
"whats": "what's",
|
||||
"what s": "what's",
|
||||
"thats": "that's",
|
||||
"that s": "that's",
|
||||
"lets": "let's",
|
||||
"let s": "let's",
|
||||
"theres": "there's",
|
||||
"there s": "there's",
|
||||
"heres": "here's",
|
||||
"here s": "here's",
|
||||
"hows": "how's",
|
||||
"how s": "how's",
|
||||
"shes": "she's",
|
||||
"she s": "she's",
|
||||
"hes": "he's",
|
||||
"he s": "he's",
|
||||
"were": "we're",
|
||||
"we re": "we're",
|
||||
"weve": "we've",
|
||||
"we ve": "we've",
|
||||
"well": "we'll",
|
||||
"we ll": "we'll",
|
||||
"its": "it's",
|
||||
"it s": "it's",
|
||||
"itll": "it'll",
|
||||
"it ll": "it'll",
|
||||
"yall": "y'all",
|
||||
"y all": "y'all",
|
||||
"wouldve": "would've",
|
||||
"would ve": "would've",
|
||||
"couldve": "could've",
|
||||
"could ve": "could've",
|
||||
"shouldve": "should've",
|
||||
"should ve": "should've",
|
||||
"mightve": "might've",
|
||||
"might ve": "might've",
|
||||
"mustve": "must've",
|
||||
"must ve": "must've",
|
||||
"maam": "ma'am",
|
||||
"ma am": "ma'am",
|
||||
"oclock": "o'clock",
|
||||
"o clock": "o'clock",
|
||||
"aint": "ain't",
|
||||
"ain t": "ain't",
|
||||
"rocknroll": "rock 'n' roll",
|
||||
"rock n roll": "rock 'n' roll"
|
||||
}
|
||||
@ -5,10 +5,249 @@
|
||||
"ft ": "feat. ",
|
||||
"featuring": "feat."
|
||||
},
|
||||
"groups": [
|
||||
"Abbott & Costello",
|
||||
"Adam & Joe",
|
||||
"Al & Smoot",
|
||||
"Alfie & Owen",
|
||||
"Allen & Grier",
|
||||
"Amos 'n' Andy",
|
||||
"Andrews Sisters",
|
||||
"AnnenMayKantereit",
|
||||
"Ashford & Simpson",
|
||||
"B. J. Thomas & The Triumphs",
|
||||
"Bachman-Turner Overdrive",
|
||||
"Baden & Vin",
|
||||
"Barenaked Ladies",
|
||||
"Barnum & Bailey",
|
||||
"Barry, Robin and Maurice Gibb",
|
||||
"Beavis & Butt-Head",
|
||||
"Ben & Ben",
|
||||
"Benny & Joon",
|
||||
"Bert & Ernie",
|
||||
"Bill & Gloria Gaither",
|
||||
"Bill & Ted",
|
||||
"Bill Haley and His Comets",
|
||||
"Billy & The Essentials",
|
||||
"Bob & Earl",
|
||||
"Bob and Tom",
|
||||
"Bobby & The Midnites",
|
||||
"Booker T. & the M.G.'s",
|
||||
"Brooks & Dunn",
|
||||
"Brown & Carney",
|
||||
"Bucky & John Pizzarelli",
|
||||
"Buddy & Julie Miller",
|
||||
"Buddy Holly and The Crickets",
|
||||
"Burt & The Backbeats",
|
||||
"Captain & Tennille",
|
||||
"Carly and Lucy Simon",
|
||||
"Carpenter & Ford",
|
||||
"Cece & Bebe Winans",
|
||||
"Chad & Jeremy",
|
||||
"Charles & Eddie",
|
||||
"Cheech & Chong",
|
||||
"Cheryl & Pam Johnson",
|
||||
"Chris & Cosey",
|
||||
"Chris & Rich Robinson",
|
||||
"Climax Blues Band",
|
||||
"Crosby, Stills & Nash",
|
||||
"Crosby, Stills, Nash & Young",
|
||||
"Darrell and Donny",
|
||||
"Daryl Hall & John Oates",
|
||||
"Dave & Ansell Collins",
|
||||
"Dave and Sugar",
|
||||
"David & David",
|
||||
"Dawn and Tony Orlando",
|
||||
"Dean & Britta",
|
||||
"Dean & Jerry",
|
||||
"Deke Dickerson & The Ecco-Fonics",
|
||||
"Dick & Dee Dee",
|
||||
"Dion and The Belmonts",
|
||||
"DJ Jazzy Jeff & The Fresh Prince",
|
||||
"Don and Juan",
|
||||
"Dr. Hook & The Medicine Show",
|
||||
"Ed & Steve Masley",
|
||||
"Eddie and The Hot Rods",
|
||||
"Emerson, Lake and Palmer",
|
||||
"Emerson, Lake and Powell",
|
||||
"England Dan & John Ford Coley",
|
||||
"Eric & The Good Good Feeling",
|
||||
"Eric B. & Rakim",
|
||||
"Ernie & Bert",
|
||||
"Eurythmics",
|
||||
"Everly Brothers",
|
||||
"Fischer-Z",
|
||||
"Fleming & John",
|
||||
"Flo & Eddie",
|
||||
"Foster & Lloyd",
|
||||
"Fred and Ginger",
|
||||
"Funk Brothers",
|
||||
"Gabe & Chloe",
|
||||
"Gallagher & Lyle",
|
||||
"Gamble & Huff",
|
||||
"Gene and Debbe",
|
||||
"George and Ira Gershwin",
|
||||
"Gerry and the Pacemakers",
|
||||
"Gogol Bordello",
|
||||
"Hall & Oates",
|
||||
"Hank & Frank",
|
||||
"Hank and Lewie Wickham",
|
||||
"Hank Williams Jr. and The Cheatin' Hearts",
|
||||
"Hankshaw and Fanny",
|
||||
"Hothouse Flowers",
|
||||
"Hues Corporation",
|
||||
"Ike & Tina Turner",
|
||||
"Jack & Jack",
|
||||
"James and Bobby Purify",
|
||||
"Jan & Dean",
|
||||
"Jane and Herondale",
|
||||
"Janie and The Greyhounds",
|
||||
"Jay and The Americans",
|
||||
"Jay and The Techniques",
|
||||
"Jermaine & Pia",
|
||||
"Jerry Lee Lewis and His Pumping Piano",
|
||||
"Jett & The Blackhearts",
|
||||
"Jody & The Jerms",
|
||||
"Joe and Eddie",
|
||||
"John & Jacob",
|
||||
"John and Mary",
|
||||
"John and Yoko",
|
||||
"Johnnie and Jack",
|
||||
"Johnny and The Hurricanes",
|
||||
"Johnny and The Moondogs",
|
||||
"Jon & Vangelis",
|
||||
"Jonas Brothers",
|
||||
"Josh and The Empty Pockets",
|
||||
"June and Johnny",
|
||||
"K-Ci & JoJo",
|
||||
"KC & The Sunshine Band",
|
||||
"Kate and Anna McGarrigle",
|
||||
"Kid 'n Play",
|
||||
"Kiki & Herb",
|
||||
"Kim & The Cadillacs",
|
||||
"Kingston Trio",
|
||||
"Kool & The Gang",
|
||||
"Ladysmith Black Mambazo",
|
||||
"Laurie & The Sighs",
|
||||
"Lee and Sara",
|
||||
"Les Paul & Mary Ford",
|
||||
"Loggins & Messina",
|
||||
"Lulu & The Luvvers",
|
||||
"Macklemore & Ryan Lewis",
|
||||
"Manny & The Casanovas",
|
||||
"Marcy & Zina",
|
||||
"Mark & Almond",
|
||||
"Mark & Clark Band",
|
||||
"Marty & Elayne",
|
||||
"Max & Mitch",
|
||||
"Mick & Keith",
|
||||
"Mickey & Sylvia",
|
||||
"Mike & The Mechanics",
|
||||
"Mills Brothers",
|
||||
"Moe & Joe",
|
||||
"Monty & The Pythons",
|
||||
"Mumford & Sons",
|
||||
"Nate and James",
|
||||
"Nico & Vinz",
|
||||
"O'Day & Simone",
|
||||
"Otis & Carla",
|
||||
"OutKast",
|
||||
"Paul & Linda McCartney",
|
||||
"Paul & Paula",
|
||||
"Peaches & Herb",
|
||||
"Peter & Gordon",
|
||||
"Peter, Paul and Mary",
|
||||
"Petty & The Heartbreakers",
|
||||
"Phil & The Firecat",
|
||||
"Phillips & Drew",
|
||||
"Pomus & Shuman",
|
||||
"Punch Brothers",
|
||||
"Randy and The Rainbows",
|
||||
"Ray & Anita",
|
||||
"Red and Kathy",
|
||||
"Reid & Simone",
|
||||
"Richie & The Young Lions",
|
||||
"Ricky and The Rockets",
|
||||
"Righteous Brothers",
|
||||
"Rob & Fab",
|
||||
"Robert and Johnny",
|
||||
"Rodgers and Hammerstein",
|
||||
"Rodgers and Hart",
|
||||
"Roe & Burns",
|
||||
"Rufus & Chaka Khan",
|
||||
"Sam & Dave",
|
||||
"Santo & Johnny",
|
||||
"Sasha & John Digweed",
|
||||
"Scott & Aimee",
|
||||
"Seals & Crofts",
|
||||
"Sergio & Odair Assad",
|
||||
"Sha Na Na",
|
||||
"Shemekia Copeland & The Alligators",
|
||||
"Shirley and Lee",
|
||||
"Simon & Garfunkel",
|
||||
"Solas",
|
||||
"Sonny & Cher",
|
||||
"Steeleye Span",
|
||||
"Steely Dan",
|
||||
"Steve & Eydie",
|
||||
"Syd & The Strawbs",
|
||||
"Tammi & Marvin",
|
||||
"The Avett Brothers",
|
||||
"The B-52s",
|
||||
"The Bacon Brothers",
|
||||
"The Bellamy Brothers",
|
||||
"The Blackwood Brothers",
|
||||
"The Blues Brothers",
|
||||
"The Carter Family",
|
||||
"The Chemical Brothers",
|
||||
"The Chuck Wagon Gang",
|
||||
"The Clark Sisters",
|
||||
"The Coasters",
|
||||
"The Cook Brothers",
|
||||
"The Delmore Brothers",
|
||||
"The Dillards",
|
||||
"The Doobie Brothers",
|
||||
"The Everly Brothers",
|
||||
"The Flying Burrito Brothers",
|
||||
"The Gatlin Brothers",
|
||||
"The Isley Brothers",
|
||||
"The Judds",
|
||||
"The Kalin Twins",
|
||||
"The Louvin Brothers",
|
||||
"The Madden Brothers",
|
||||
"The Neville Brothers",
|
||||
"The Oak Ridge Boys",
|
||||
"The Osborne Brothers",
|
||||
"The Pointer Sisters",
|
||||
"The Proclaimers",
|
||||
"The Stanley Brothers",
|
||||
"The Statler Brothers",
|
||||
"The Tamperer featuring Maya",
|
||||
"The Walker Brothers",
|
||||
"The Wilburn Brothers",
|
||||
"Thompson Twins",
|
||||
"Tim & Mollie O'Brien",
|
||||
"Tom and Jerry",
|
||||
"Tom and Viv",
|
||||
"Tom Petty & The Heartbreakers",
|
||||
"Tommy and The Twilights",
|
||||
"Tony and The Graduates",
|
||||
"Tucker & Tanya",
|
||||
"Vera & The Beaters",
|
||||
"Vince & Linda",
|
||||
"Waylon & Willie",
|
||||
"Wendy & Lisa",
|
||||
"Wham!",
|
||||
"Will & The Bushmen",
|
||||
"Willie & The Poor Boys",
|
||||
"Wilson Phillips",
|
||||
"Womack & Womack",
|
||||
"Zapp & Roger"
|
||||
],
|
||||
"metadata": {
|
||||
"description": "Name variations for artist name normalization",
|
||||
"description": "Name variations and known band names for artist name normalization",
|
||||
"last_updated": "2024-12-19",
|
||||
"version": "3.0",
|
||||
"note": "Removed artists section - now using database search instead"
|
||||
"version": "3.1",
|
||||
"note": "Added groups section with 200+ known band names that should not be split as collaborations"
|
||||
}
|
||||
}
|
||||
@ -1,29 +0,0 @@
|
||||
{
|
||||
"recordings": [
|
||||
{
|
||||
"artist_mbid": "66c662b6-6e2f-4930-8610-912e24c63ed1",
|
||||
"title": "Shot in the Dark",
|
||||
"recording_mbid": "cf8b5cd0-d97c-413d-882f-fc422a2e57db"
|
||||
},
|
||||
{
|
||||
"artist_mbid": "afb680f2-b6eb-4cd7-a70b-a63b25c763d5",
|
||||
"title": "Finesse",
|
||||
"recording_mbid": "8ed14014-547a-4128-ab81-c2dca7ae198e"
|
||||
},
|
||||
{
|
||||
"artist_mbid": "afb680f2-b6eb-4cd7-a70b-a63b25c763d5",
|
||||
"title": "Finesse Remix",
|
||||
"recording_mbid": "8ed14014-547a-4128-ab81-c2dca7ae198e"
|
||||
},
|
||||
{
|
||||
"artist_mbid": "20244d07-534f-4eff-b4d4-930878889970",
|
||||
"title": "Love Story",
|
||||
"recording_mbid": "d783e6c5-761f-4fc3-bfcf-6089cdfc8f96"
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"description": "Known recording MBIDs for common songs",
|
||||
"last_updated": "2024-01-01",
|
||||
"version": "1.0"
|
||||
}
|
||||
}
|
||||
@ -1,26 +0,0 @@
|
||||
[
|
||||
{
|
||||
"artist": "ACDC",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"guid": "8946008c-7acc-d187-60e6-5286e55ad502",
|
||||
"path": "z://MP4\\ACDC - Shot In The Dark (Karaoke Version).mp4",
|
||||
"title": "Shot In The Dark"
|
||||
},
|
||||
{
|
||||
"artist": "Bruno Mars ft. Cardi B",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"guid": "946a1077-ab9e-300c-3a72-b1e141e9706f",
|
||||
"path": "z://MP4\\Bruno Mars ft. Cardi B - Finesse Remix (Karaoke Version).mp4",
|
||||
"title": "Finesse Remix"
|
||||
},
|
||||
{
|
||||
"artist": "Taylor Swift",
|
||||
"disabled": false,
|
||||
"favorite": true,
|
||||
"guid": "12345678-1234-1234-1234-123456789012",
|
||||
"path": "z://MP4\\Taylor Swift - Love Story (Karaoke Version).mp4",
|
||||
"title": "Love Story"
|
||||
}
|
||||
]
|
||||
2599
data/songs.json
Executable file → Normal file
2599
data/songs.json
Executable file → Normal file
File diff suppressed because it is too large
Load Diff
@ -1,835 +0,0 @@
|
||||
[
|
||||
{
|
||||
"artist": "One Night Only",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"guid": "c789a741-923a-8c8f-1a5a-1fdb7399f12b",
|
||||
"path": "z://CDG\\Sunfly Collection\\Sunfly Hits\\SF265\\One Night Only - Just For Tonight - SF265 - 14.mp3",
|
||||
"title": "Just For Tonight"
|
||||
},
|
||||
{
|
||||
"artist": "Owl City",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"guid": "65643569-49f5-8cca-aba0-939a52bc462f",
|
||||
"path": "z://MP4\\KaraFun Karaoke\\Karaoke Vanilla Twilight - Owl City.mp4",
|
||||
"title": "Vanilla Twilight"
|
||||
},
|
||||
{
|
||||
"artist": "Big Mountain",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"guid": "89b50784-1b62-ee66-1dee-0619a59c0c1c",
|
||||
"path": "z://MP4\\KaraFun Karaoke\\Karaoke Baby, I Love Your Way - Big Mountain.mp4",
|
||||
"title": "Baby, I Love Your Way"
|
||||
},
|
||||
{
|
||||
"artist": "Steppenwolf",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"guid": "d2b41e7d-f96c-0907-ce25-920dd91fb921",
|
||||
"path": "z://CDG\\Various\\Steppenwolf - Born To Be Wild (3).mp3",
|
||||
"title": "Born To Be Wild (3)"
|
||||
},
|
||||
{
|
||||
"artist": "Beyonc\u00e9",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"guid": "ac14f3d1-ff20-3ac1-0191-905de8a99877",
|
||||
"path": "z://MP4\\Sing King Karaoke\\Beyonc\u00e9 - 7 11 (Karaoke Version).mp4",
|
||||
"title": "7 11"
|
||||
},
|
||||
{
|
||||
"artist": "Foreigner",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"guid": "9bf3fabf-6957-4241-dd1c-6dba29d83655",
|
||||
"path": "z://MP4\\Let's Sing Karaoke\\Foreigner - I Want To Know What Love Is (Karaoke & Lyrics) (2).mp4",
|
||||
"title": "I Want To Know What Love Is"
|
||||
},
|
||||
{
|
||||
"artist": "Liam Gallagher",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"guid": "e874f895-e946-ef4d-354e-8c33af50be71",
|
||||
"path": "z://MP4\\KtvEntertainment\\Liam Gallagher - For What It's Worth Karaoke.mp4",
|
||||
"title": "For What It's Worth"
|
||||
},
|
||||
{
|
||||
"artist": "George Michael",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"guid": "459f42c2-9aa4-8eac-e224-b01c5dd23f92",
|
||||
"path": "z://MP4\\Karaoke Sing Sing\\George Michael - Careless Whisper (Karaoke Version).mp4",
|
||||
"title": "Careless Whisper"
|
||||
},
|
||||
{
|
||||
"artist": "Queens Of The Stone Age",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"guid": "c5a2429d-c54d-f982-4bf3-2eb7eeb1cead",
|
||||
"path": "z://CDG\\Various\\Queens Of The Stone Age - Little Sister.mp3",
|
||||
"title": "Little Sister"
|
||||
},
|
||||
{
|
||||
"artist": "Rocketman",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"guid": "6a3c0389-9b7b-b6aa-a3eb-1cd8b0524f12",
|
||||
"path": "z://MP4\\KaraFun Karaoke\\Karaoke The Bitch is Back - Rocketman.mp4",
|
||||
"title": "The Bitch is Back"
|
||||
},
|
||||
{
|
||||
"artist": "Bobby Vee",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"genre": "Karaoke",
|
||||
"guid": "fa4dea55-647c-5efa-534f-f0bcbbd5414b",
|
||||
"path": "z://CDG\\Sunfly Collection\\Sunfly Most Wanted\\SFMW 813\\Bobby Vee - Run To Him - SFMW 813 -05.mp3",
|
||||
"title": "Run To Him"
|
||||
},
|
||||
{
|
||||
"artist": "Sheryl Crow",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"guid": "029cdcce-6813-5b7f-8a60-ebf522dc4c77",
|
||||
"path": "z://CDG\\Various\\Sheryl Crow - Run Baby Run.mp3",
|
||||
"title": "Run Baby Run"
|
||||
},
|
||||
{
|
||||
"artist": "Lady Gaga",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"guid": "47bb91da-fc18-b97d-72b1-32ca1d847bd5",
|
||||
"path": "z://CDG\\Mr Entertainer\\MRTC01\\MRTC01-04\\MRTC01-4-09 - Lady Gaga - Poker Face.mp3",
|
||||
"title": "Poker Face"
|
||||
},
|
||||
{
|
||||
"artist": "Cliff Richard",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"guid": "9586c5d0-538b-8351-2f87-9bee5e6ce827",
|
||||
"path": "z://MP4\\KaraFun Karaoke\\Karaoke Rave On - Cliff Richard.mp4",
|
||||
"title": "Rave On"
|
||||
},
|
||||
{
|
||||
"artist": "Glen Campbell",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"guid": "0734fdc9-73a6-842c-14f6-36f643c12e99",
|
||||
"path": "z://CDG\\Sunfly Collection\\Sunfly Hits\\SF051\\Glen Campbell - Rhinestone Cowboy - SF051 - 15.mp3",
|
||||
"title": "Rhinestone Cowboy"
|
||||
},
|
||||
{
|
||||
"artist": "Chesney Hawkes",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"genre": "Karaoke",
|
||||
"guid": "40f504ff-dd2e-5ebe-78ce-c7445402c30a",
|
||||
"path": "z://MP4\\ZoomKaraokeOfficial\\Chesney Hawkes - The One And Only.mp4",
|
||||
"title": "The One And Only"
|
||||
},
|
||||
{
|
||||
"artist": "Tony Orlando & Dawn",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"guid": "37b41627-6786-8bc9-b340-21561bb0a141",
|
||||
"path": "z://MP4\\KaraFun Karaoke\\Karaoke Candida - Tony Orlando And Dawn.mp4",
|
||||
"title": "Candida"
|
||||
},
|
||||
{
|
||||
"artist": "Judas Priest",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"guid": "d51e1ee3-76b1-4b64-722f-c09b7678ed10",
|
||||
"path": "z://MP4\\KaraFun Karaoke\\Karaoke Breaking The Law - Judas Priest.mp4",
|
||||
"title": "Breaking The Law"
|
||||
},
|
||||
{
|
||||
"artist": "America",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"guid": "f92d9498-09c4-a2fa-9077-5cae27303065",
|
||||
"path": "z://MP4\\KaraFun Karaoke\\Karaoke Sister Golden Hair - America.mp4",
|
||||
"title": "Sister Golden Hair"
|
||||
},
|
||||
{
|
||||
"artist": "Gerry Guthrie",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"guid": "10272677-9a07-cdaa-0017-4fe201873ccf",
|
||||
"path": "z://MP4\\KtvEntertainment\\Gerry Guthrie - Did She Mention My Name Karaoke Lyrics.mp4",
|
||||
"title": "Did She Mention My Name"
|
||||
},
|
||||
{
|
||||
"artist": "The Allman Brothers Band",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"guid": "87bb825e-7f8c-6cef-40b1-dc2fac930e79",
|
||||
"path": "z://MP4\\KtvEntertainment\\The Allman Brothers Band - Whipping Post Karaoke Lyrics.mp4",
|
||||
"title": "Whipping Post"
|
||||
},
|
||||
{
|
||||
"artist": "The Beach Boys",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"genre": "Karaoke",
|
||||
"guid": "b8d51214-0c90-d4ce-928f-b3e2cc1080c6",
|
||||
"path": "z://MP4\\ZoomKaraokeOfficial\\The Beach Boys - Sloop John B (Cool Vocal Only Ending).mp4",
|
||||
"title": "Sloop John B (Cool Vocal Only Ending)"
|
||||
},
|
||||
{
|
||||
"artist": "Floaters",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"genre": "Karaoke",
|
||||
"guid": "0b7e8b42-0f80-96b0-c0c7-3f7ddf0c8150",
|
||||
"path": "z://CDG\\Sunfly Collection\\Sunfly Hits\\SF103\\Floaters - Float On - SF103 - 04.mp3",
|
||||
"title": "Float On"
|
||||
},
|
||||
{
|
||||
"artist": "Afroman",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"guid": "3076eef1-f794-bd75-5d10-1e90139609d0",
|
||||
"path": "z://MP4\\KaraFun Karaoke\\Karaoke Because I Got High - Afroman.mp4",
|
||||
"title": "Because I Got High"
|
||||
},
|
||||
{
|
||||
"artist": "Lavato, Demi & Joe Jonas",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"genre": "Karaoke",
|
||||
"guid": "7c1b42d7-14e4-2f6d-40ba-fa41a217c0ab",
|
||||
"path": "z://CDG\\SBI\\SBI-05\\Lavato, Demi & Joe Jonas - This Is Me.mp3",
|
||||
"title": "This Is Me"
|
||||
},
|
||||
{
|
||||
"artist": "Drake",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"guid": "2db9fd74-3353-0fb2-a289-666ad6e1020a",
|
||||
"path": "z://MP4\\King of Karaoke\\Drake - Passionfruit - King of Karaoke.mp4",
|
||||
"title": "Passionfruit"
|
||||
},
|
||||
{
|
||||
"artist": "Elvis Presley",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"genre": "Karaoke",
|
||||
"guid": "b13a7a30-c5ee-a701-348f-d79a36e18b8e",
|
||||
"path": "z://CDG\\Various\\Elvis Presley - Where Do You Come From.mp3",
|
||||
"title": "Where Do You Come From"
|
||||
},
|
||||
{
|
||||
"artist": "Coldplay",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"genre": "Karaoke",
|
||||
"guid": "7820a2af-e4fa-f3a2-d070-46ad73e08ace",
|
||||
"path": "z://CDG\\Sunfly Collection\\Sunfly Hits\\SF308\\Coldplay - Paradise - SF308 - 11.mp3",
|
||||
"title": "Paradise"
|
||||
},
|
||||
{
|
||||
"artist": "Belinda Carlisle",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"guid": "9293d245-f376-b2c8-a268-28d8dab3161f",
|
||||
"path": "z://CDG\\Various\\Belinda Carlisle - Heaven Is A Place On Earth.mp3",
|
||||
"title": "Heaven Is A Place On Earth"
|
||||
},
|
||||
{
|
||||
"artist": "Glee",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"guid": "ee583fec-9afb-1e58-c2a4-6b01d4241cdc",
|
||||
"path": "z://CDG\\Mr Entertainer\\MRTC01\\MRTC01-02\\MRTC01-2-13 - Glee Cast - Halo+walking On Sunshine.mp3",
|
||||
"title": "Halo+walking On Sunshine"
|
||||
},
|
||||
{
|
||||
"artist": "Calvin Harris ft. Florence Welch",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"genre": "Karaoke",
|
||||
"guid": "2a209afa-e7f9-ea64-c928-7f14499d9756",
|
||||
"path": "z://MP4\\ZoomKaraokeOfficial\\Calvin Harris feat Florence Welch - Sweet Nothing.mp4",
|
||||
"title": "Sweet Nothing"
|
||||
},
|
||||
{
|
||||
"artist": "Jim Croce",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"guid": "ceda7d43-a35d-1f1c-81d2-7ff6eb8c9848",
|
||||
"path": "z://MP4\\KtvEntertainment\\Jim Croce - Lover's Cross Karaoke Lyrics.mp4",
|
||||
"title": "Lover's Cross"
|
||||
},
|
||||
{
|
||||
"artist": "Herman's Hermits",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"genre": "Karaoke",
|
||||
"guid": "120fb0a5-e36e-1d0f-1acd-80ecadc8ec8b",
|
||||
"path": "z://MP4\\ZoomKaraokeOfficial\\Hermans Hermits - Somethings Happening.mp4",
|
||||
"title": "Something's Happening"
|
||||
},
|
||||
{
|
||||
"artist": "The Everly Brothers",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"guid": "08ead07b-6164-e9da-b203-554de875461b",
|
||||
"path": "z://CDG\\Sunfly Collection\\Sunfly Hits\\SF045\\The Everly Brothers - Price Of Love, The - SF045 - 11.mp3",
|
||||
"title": "Price Of Love, The"
|
||||
},
|
||||
{
|
||||
"artist": "Bring Me The Horizon",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"guid": "c0382fef-4a55-7cc0-54a7-12b9bf3b4198",
|
||||
"path": "z://MP4\\Sing King Karaoke\\Bring Me The Horizon - Can You Feel My Heart (Karaoke Version).mp4",
|
||||
"title": "Can You Feel My Heart"
|
||||
},
|
||||
{
|
||||
"artist": "Van Halen",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"guid": "d6ebdbb5-a61a-0fac-84d4-62f335abcaca",
|
||||
"path": "z://MP4\\CerealKillerKaraoke\\CKK-VR - Van Halen - Dirty Movies (Karaoke).mp4",
|
||||
"title": "Dirty Movies"
|
||||
},
|
||||
{
|
||||
"artist": "Chris Brown Featuring T-Pain",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"genre": "Karaoke",
|
||||
"guid": "b0329fd3-dadd-bee1-8807-ae090b3a1b64",
|
||||
"path": "z://MP4\\KaraokeOnVEVO\\Chris Brown Featuring T-Pain - Kiss Kiss.mp4",
|
||||
"title": "Kiss Kiss"
|
||||
},
|
||||
{
|
||||
"artist": "Andy Griggs",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"genre": "Karaoke",
|
||||
"guid": "19fca440-e6ba-b5d0-764c-395cda15ad68",
|
||||
"path": "z://CDG\\Various\\Andy Griggs - You Wont Ever Be Lonely.mp3",
|
||||
"title": "You Wont Ever Be Lonely"
|
||||
},
|
||||
{
|
||||
"artist": "Chris Norman",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"genre": "Karaoke",
|
||||
"guid": "41c56192-4929-4742-77b3-9d823ebf8fe7",
|
||||
"path": "z://MP4\\ZoomKaraokeOfficial\\Chris Norman - The Girl I Love.mp4",
|
||||
"title": "The Girl I Love"
|
||||
},
|
||||
{
|
||||
"artist": "Ruth B",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"guid": "0848db87-6f59-ebdf-1cdd-00b452b40b34",
|
||||
"path": "z://MP4\\Sing King Karaoke\\Ruth B - Golden (Karaoke Version).mp4",
|
||||
"title": "Golden"
|
||||
},
|
||||
{
|
||||
"artist": "boygenius",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"genre": "Karaoke",
|
||||
"guid": "1653c073-cdbb-4256-71c5-09ec5185f37d",
|
||||
"path": "z://MP4\\Sing King Karaoke\\boygenius - Emily Im Sorry.mp4",
|
||||
"title": "Emily I'm Sorry"
|
||||
},
|
||||
{
|
||||
"artist": "Blue Mink",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"genre": "Karaoke",
|
||||
"guid": "4fce614f-7a59-50bd-737f-db5da614abb9",
|
||||
"path": "z://MP4\\KaraokeOnVEVO\\Blue Mink - Melting Pot.mp4",
|
||||
"title": "Melting Pot"
|
||||
},
|
||||
{
|
||||
"artist": "Dean Martin",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"guid": "3717fe66-95b4-6fec-36d5-fca62e135421",
|
||||
"path": "z://MP4\\KtvEntertainment\\Dean Martin - Non Dimenticar Karaoke Lyrics.mp4",
|
||||
"title": "Non Dimenticar"
|
||||
},
|
||||
{
|
||||
"artist": "Queen",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"guid": "c2427066-b8b8-90bf-2c8a-47a9ec7e1096",
|
||||
"path": "z://MP4\\Sing King Karaoke\\Queen - Somebody To Love (Karaoke Version).mp4",
|
||||
"title": "Somebody To Love"
|
||||
},
|
||||
{
|
||||
"artist": "Dionne Warwick",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"guid": "3c5506b7-6980-d7af-efc2-f0476d74cada",
|
||||
"path": "z://MP4\\KaraFun Karaoke\\Karaoke That s What Friends Are For - Dionne Warwick.mp4",
|
||||
"title": "That s What Friends Are For"
|
||||
},
|
||||
{
|
||||
"artist": "The Isley Brothers",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"guid": "f99facc0-a093-e0e7-6faf-f332e2067da4",
|
||||
"path": "z://MP4\\KaraFun Karaoke\\Karaoke Shout - The Isley Brothers.mp4",
|
||||
"title": "Shout"
|
||||
},
|
||||
{
|
||||
"artist": "Olivia Rodrigo",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"guid": "7b651ed4-04ed-24f4-817e-d75a308594e9",
|
||||
"path": "z://MP4\\Stingray Karaoke\\Olivia Rodrigo - get him back! (Karaoke Version).mp4",
|
||||
"title": "get him back!"
|
||||
},
|
||||
{
|
||||
"artist": "The Kentucky Headhunters",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"guid": "6eaccc40-e1bf-b1de-23c4-cdaa7aad730c",
|
||||
"path": "z://MP4\\KaraFun Karaoke\\Karaoke Spirit In The Sky - The Kentucky Headhunters.mp4",
|
||||
"title": "Spirit In The Sky"
|
||||
},
|
||||
{
|
||||
"artist": "Yebba",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"genre": "Karaoke",
|
||||
"guid": "08c840ce-8b80-4856-6132-8d7bf9a357e9",
|
||||
"path": "z://MP4\\Sing King Karaoke\\Yebba - My Mind Karaoke Version).mp4",
|
||||
"title": "My Mind Karaoke Version)"
|
||||
},
|
||||
{
|
||||
"artist": "Shawn Mendes",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"guid": "9722f3a0-e17f-1df6-cebe-516a903530dc",
|
||||
"path": "z://MP4\\Sing King Karaoke\\Shawn Mendes - Where Were You In The Morning (Karaoke Version).mp4",
|
||||
"title": "Where Were You In The Morning"
|
||||
},
|
||||
{
|
||||
"artist": "Abc",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"guid": "511ea86b-d81a-257f-ffc2-6ab2453df440",
|
||||
"path": "z://CDG\\Sunfly Collection\\Sunfly Hits\\SF111\\Abc - The Look Of Love - SF111 - 02.mp3",
|
||||
"title": "The Look Of Love"
|
||||
},
|
||||
{
|
||||
"artist": "Darryl Worley",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"guid": "b3422ad6-01de-00aa-84a4-e2efa46e0c98",
|
||||
"path": "z://MP4\\Let's Sing Karaoke\\Worley, Darryl - I Miss My Friend (Karaoke & Lyrics) (2).mp4",
|
||||
"title": "I Miss My Friend"
|
||||
},
|
||||
{
|
||||
"artist": "Peggy Lee",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"guid": "ff39f28a-d933-6c86-72e8-aecc4eb0a844",
|
||||
"path": "z://MP4\\KaraFun Karaoke\\Karaoke Fever - Peggy Lee.mp4",
|
||||
"title": "Fever"
|
||||
},
|
||||
{
|
||||
"artist": "Lorrie Morgan",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"guid": "a88ab5a7-ec7c-22ae-4b8a-3f4701aeff04",
|
||||
"path": "z://CDG\\Various\\Lorrie Morgan - Dear Me.mp3",
|
||||
"title": "Dear Me"
|
||||
},
|
||||
{
|
||||
"artist": "Kacey Musgraves",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"genre": "Karaoke",
|
||||
"guid": "39ad7a92-fb3c-6874-85c6-5150ae52415b",
|
||||
"path": "z://MP4\\ZoomKaraokeOfficial\\Kacey Musgraves - Merry Go Round.mp4",
|
||||
"title": "Merry Go 'Round"
|
||||
},
|
||||
{
|
||||
"artist": "Rihanna ft. Mikky Ekko",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"genre": "Karaoke",
|
||||
"guid": "3a3bd95f-f573-d3c2-ac2d-5b75fba98f14",
|
||||
"path": "z://CDG\\Big Hits Karaoke\\BHK039\\BH039-04 - Rihanna & Mikky Ekko - Stay.mp3",
|
||||
"title": "Stay"
|
||||
},
|
||||
{
|
||||
"artist": "Luciano Pavarotti",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"guid": "48ffd3c9-3fb9-be69-4e0e-d4974144c527",
|
||||
"path": "z://MP4\\KaraFun Karaoke\\O Sole Mio - Luciano Pavarotti Karaoke Version KaraFun.mp4",
|
||||
"title": "O Sole Mio"
|
||||
},
|
||||
{
|
||||
"artist": "David Bowie",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"guid": "10294c23-dee6-1e13-5bfc-090d5ed88c4a",
|
||||
"path": "z://MP4\\Stingray Karaoke\\Let's Dance in the Style of David Bowie karaoke video with lyrics (no lead vocal).mp4",
|
||||
"title": "Let's Dance"
|
||||
},
|
||||
{
|
||||
"artist": "Smokie",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"guid": "d2dc1db0-ae19-a8a7-42e4-f19ab155050b",
|
||||
"path": "z://MP4\\KaraFun Karaoke\\Karaoke Needles and Pins - Smokie.mp4",
|
||||
"title": "Needles and Pins"
|
||||
},
|
||||
{
|
||||
"artist": "LMFAO",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"genre": "Karaoke",
|
||||
"guid": "ed6d018b-9a55-b039-8d0b-78568428f787",
|
||||
"path": "z://CDG\\SBI\\SBI-03\\SB24973 - LMFAO - Sexy And I Know It.mp3",
|
||||
"title": "Sexy And I Know It"
|
||||
},
|
||||
{
|
||||
"artist": "Elvis Presley",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"genre": "Karaoke",
|
||||
"guid": "a54bec5e-5898-4b70-cb08-475ff77aa301",
|
||||
"path": "z://MP4\\Sing King Karaoke\\Elvis Presley - Always on My Mind.mp4",
|
||||
"title": "Always on My Mind"
|
||||
},
|
||||
{
|
||||
"artist": "Gabrielle",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"guid": "da575355-f72f-6934-0a34-fa54c37a8233",
|
||||
"path": "z://CDG\\Sunfly Collection\\Sunfly Hits\\SF218\\Gabrielle - Stay The Same - SF218 - 12.mp3",
|
||||
"title": "Stay The Same"
|
||||
},
|
||||
{
|
||||
"artist": "Destiny's Child",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"guid": "24f3fb48-ea83-12c1-f998-cfbb917ab927",
|
||||
"path": "z://CDG\\Sunfly Collection\\Sunfly Hits\\SF177\\Destiny's Child - Survivor - SF177 - 03.mp3",
|
||||
"title": "Survivor"
|
||||
},
|
||||
{
|
||||
"artist": "Sabrina Carpenter",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"genre": "Karaoke",
|
||||
"guid": "795b7f81-f7a8-c3a7-4888-facd8b7f61a9",
|
||||
"path": "z://MP4\\sing2karaoke\\Sabrina Carpenter - Busy Woman.mp4",
|
||||
"title": "Busy Woman"
|
||||
},
|
||||
{
|
||||
"artist": "The Goodies",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"guid": "f24004dd-c2fa-2e3e-270b-cef6d1cfa1de",
|
||||
"path": "z://CDG\\Sunfly Collection\\Sunfly Gold\\SFGD041 - Monty Python & The Goodies\\The Goodies - Make A Daft Noise For Christmas - SFG041 - 09.mp3",
|
||||
"title": "Make A Daft Noise For Christmas"
|
||||
},
|
||||
{
|
||||
"artist": "Tony! Toni! Tone!",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"genre": "Karaoke",
|
||||
"guid": "3fc77353-14b6-c1e1-a746-6547cdbbceed",
|
||||
"path": "z://MP4\\KaraokeOnVEVO\\Tony Toni Tone - Feels Good.mp4",
|
||||
"title": "Feels Good"
|
||||
},
|
||||
{
|
||||
"artist": "Bruce Springsteen",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"guid": "0aa63bcd-0d08-82d6-87a2-9f5421be75d2",
|
||||
"path": "z://CDG\\Various\\Bruce Springsteen - Cover Me.mp3",
|
||||
"title": "Cover Me"
|
||||
},
|
||||
{
|
||||
"artist": "Sing",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"guid": "e436056b-893c-d7e1-651a-2b26e1758b93",
|
||||
"path": "z://MP4\\KaraFun Karaoke\\Karaoke Let s Face the Music and Dance - Sing.mp4",
|
||||
"title": "Let s Face the Music and Dance"
|
||||
},
|
||||
{
|
||||
"artist": "Sublime",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"guid": "0e06e44b-da10-6067-f471-ea9c3b3a3e7c",
|
||||
"path": "z://MP4\\KaraFun Karaoke\\Karaoke Garden Grove - Sublime.mp4",
|
||||
"title": "Garden Grove"
|
||||
},
|
||||
{
|
||||
"artist": "Kelly Clarkson",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"genre": "karaoke",
|
||||
"guid": "bc777abe-f7da-5395-5798-a61b28ebf000",
|
||||
"path": "z://CDG\\Sunfly Collection\\Sunfly Hits\\SF349\\Kelly Clarkson - Heartbeat Song - SF301 - 09.mp3",
|
||||
"title": "Heartbeat Song"
|
||||
},
|
||||
{
|
||||
"artist": "Olly Murs",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"guid": "608b9fe0-d60b-aba6-b020-9a015838fdf2",
|
||||
"path": "z://MP4\\KtvEntertainment\\Olly Murs - You Don't Know Love Karaoke Lyrics.mp4",
|
||||
"title": "You Don't Know Love"
|
||||
},
|
||||
{
|
||||
"artist": "Doechii",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"genre": "Karaoke",
|
||||
"guid": "7795470f-0410-b8c4-c871-2bd31edf1a36",
|
||||
"path": "z://MP4\\Sing King Karaoke\\Doechii - Anxiety.mp4",
|
||||
"title": "Anxiety"
|
||||
},
|
||||
{
|
||||
"artist": "Florence + The Machine",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"guid": "20967d47-9855-3bed-a442-24e91b4ffdd7",
|
||||
"path": "z://MP4\\KaraFun Karaoke\\Karaoke No Light, No Light - Florence + The Machine.mp4",
|
||||
"title": "No Light, No Light"
|
||||
},
|
||||
{
|
||||
"artist": "The Beatles",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"guid": "96a11cc6-acb8-8ff7-cecb-7e8cde85e062",
|
||||
"path": "z://MP4\\KtvEntertainment\\The Beatles - P.S. I Love You (Karaoke without Vocal).mp4",
|
||||
"title": "P.S. I Love You"
|
||||
},
|
||||
{
|
||||
"artist": "Mickey Gilley",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"guid": "6f831da9-1d07-ac40-8875-16981af0b351",
|
||||
"path": "z://MP4\\KaraFun Karaoke\\Karaoke Talk To Me - Mickey Gilley.mp4",
|
||||
"title": "Talk To Me"
|
||||
},
|
||||
{
|
||||
"artist": "Sech ft. Darrell, Nicky Jam, Ozuna & Anuel AA",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"guid": "dbc38404-6328-9e9a-f82b-09b797d29e00",
|
||||
"path": "z://MP4\\Stingray Karaoke\\Otro trago Sech feat. Darrell, Nicky Jam, Ozuna & Anuel AA Karaoke with Lyrics.mp4",
|
||||
"title": "Otro trago"
|
||||
},
|
||||
{
|
||||
"artist": "Boyzone",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"genre": "Karaoke",
|
||||
"guid": "c785c4a5-8b97-a40c-c163-c8be70c7ed1f",
|
||||
"path": "z://MP4\\Sing King Karaoke\\Boyzone - Baby Can I Hold You Tonight.mp4",
|
||||
"title": "Baby Can I Hold You Tonight"
|
||||
},
|
||||
{
|
||||
"artist": "Jeannie C Riley",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"genre": "Karaoke",
|
||||
"guid": "3ba6129d-ac04-b405-044d-8b453f565165",
|
||||
"path": "z://MP4\\ZoomKaraokeOfficial\\Jeannie C Riley - Harper Valley PTA Karaoke Version from Zoom Karaoke.mp4",
|
||||
"title": "Harper Valley P.T.A. - Karaoke Version from Zoom Karaoke"
|
||||
},
|
||||
{
|
||||
"artist": "Razorlight",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"guid": "e28f0e1f-ea58-7f26-cbb3-40b0c3f538b2",
|
||||
"path": "z://CDG\\Sunfly Collection\\Sunfly Hits\\SF248\\Razorlight - America - SF248 - 06.mp3",
|
||||
"title": "America"
|
||||
},
|
||||
{
|
||||
"artist": "Sex Pistols",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"guid": "144ff474-2a6f-b72a-5b1f-363616437767",
|
||||
"path": "z://MP4\\singsongsmusic\\God Save the Queen - Karaoke HD (In the style of Sex Pistols).mp4",
|
||||
"title": "God Save the Queen"
|
||||
},
|
||||
{
|
||||
"artist": "Dierks Bentley",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"genre": "Karaoke",
|
||||
"guid": "7b438180-6926-b78f-7559-6c921a75af62",
|
||||
"path": "z://MP4\\KaraokeOnVEVO\\Dierks Bentley - Free And Easy (Down The Road I Go.mp4",
|
||||
"title": "Free And Easy (Down The Road I Go)"
|
||||
},
|
||||
{
|
||||
"artist": "Iggy Azalea",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"guid": "77170d20-a51a-1ec3-5f8d-555d299db97d",
|
||||
"path": "z://MP4\\singsongsmusic\\Team - Karaoke HD (In the style of Iggy Azalea).mp4",
|
||||
"title": "Team"
|
||||
},
|
||||
{
|
||||
"artist": "Troye Sivan",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"guid": "1c2f1901-0be9-4ab9-f012-6a91b99ae86a",
|
||||
"path": "z://MP4\\Sing King Karaoke\\Troye Sivan - WILD (Karaoke Version).mp4",
|
||||
"title": "WILD"
|
||||
},
|
||||
{
|
||||
"artist": "Adam Lambert",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"guid": "91acab7b-0884-6348-f30a-fb4d50899665",
|
||||
"path": "z://MP4\\KtvEntertainment\\Adam Lambert - Another Lonely Night Karaoke Lyrics.mp4",
|
||||
"title": "Another Lonely Night"
|
||||
},
|
||||
{
|
||||
"artist": "Big Shaq",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"genre": "karaoke",
|
||||
"guid": "f8804acb-a48f-ba25-5422-00535521a9b0",
|
||||
"path": "z://CDG\\Sunfly Collection\\Sunfly Hits\\SF383\\SF383-03 - Big Shaq - Man's Not Hot.mp3",
|
||||
"title": "Man's Not Hot"
|
||||
},
|
||||
{
|
||||
"artist": "Air Supply",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"genre": "Karaoke",
|
||||
"guid": "bf25b9e1-d366-5b61-aa82-cbb35bcf52a4",
|
||||
"path": "z://MP4\\Sing King Karaoke\\Air Supply - Making Love Out Of Nothing At All.mp4",
|
||||
"title": "Making Love Out Of Nothing At All"
|
||||
},
|
||||
{
|
||||
"artist": "Billy Joel",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"guid": "93c5c387-b76a-6105-19e4-cf7db9d269e3",
|
||||
"path": "z://CDG\\Various\\Billy Joel - Entertainer.mp3",
|
||||
"title": "Entertainer"
|
||||
},
|
||||
{
|
||||
"artist": "Faith Hill",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"genre": "Karaoke",
|
||||
"guid": "5063126b-718f-fe62-ec89-2c701f43ccf8",
|
||||
"path": "z://CDG\\Sunfly Collection\\Sunfly Most Wanted\\SFMW 844\\Faith Hill - Where Are You Christmas -SFMW 844 -12.mp3",
|
||||
"title": "Where Are You Christmas"
|
||||
},
|
||||
{
|
||||
"artist": "Nicki Minaj Ft.drake",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"guid": "49a1a33d-799b-bbfd-1875-3c551424f75a",
|
||||
"path": "z://CDG\\Sunfly Collection\\Sunfly Hits\\SF301\\Nicki Minaj Ft.drake - Moment 4 Life (clean) - SF301 - 13.mp3",
|
||||
"title": "Moment 4 Life (clean)"
|
||||
},
|
||||
{
|
||||
"artist": "P!nk",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"guid": "3c330302-071d-5a31-b34b-5cb38c12adf1",
|
||||
"path": "z://MP4\\KaraFun Karaoke\\Karaoke Long Way To Happy - Pink.mp4",
|
||||
"title": "Long Way To Happy"
|
||||
},
|
||||
{
|
||||
"artist": "Bryan Ferry",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"guid": "9ccab14f-6ce8-85f6-84eb-c2ee893e3a5d",
|
||||
"path": "z://CDG\\Sunfly Collection\\Sunfly Hits\\SF135\\Bryan Ferry - Let's Stick Together - SF135 - 05.mp3",
|
||||
"title": "Let's Stick Together"
|
||||
},
|
||||
{
|
||||
"artist": "Snow Patrol",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"guid": "18b21f83-2c41-f548-3407-a891eb1885a0",
|
||||
"path": "z://CDG\\Sunfly Collection\\Sunfly Hits\\SF245\\Snow Patrol - Chasing Cars - SF245 - 06.mp3",
|
||||
"title": "Chasing Cars"
|
||||
},
|
||||
{
|
||||
"artist": "Janet Jackson",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"genre": "Karaoke",
|
||||
"guid": "3952c8ba-e218-ea19-be21-e42791836cb7",
|
||||
"path": "z://MP4\\KaraokeOnVEVO\\Janet Jackson - Nasty.mp4",
|
||||
"title": "Nasty"
|
||||
},
|
||||
{
|
||||
"artist": "Corby, Matt",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"guid": "2f699149-d5e9-f17b-a93e-eae193630509",
|
||||
"path": "z://CDG\\Big Hits Karaoke\\BHK028\\BHK028-01 - Corby, Matt - Brother.mp3",
|
||||
"title": "Brother"
|
||||
},
|
||||
{
|
||||
"artist": "Little Richard",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"guid": "d6934a39-5b22-598c-8817-80120e3e91c9",
|
||||
"path": "z://CDG\\Various\\Little Richard - Good Golly Miss Molly.mp3",
|
||||
"title": "Good Golly Miss Molly"
|
||||
},
|
||||
{
|
||||
"artist": "The Flower Drum Song",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"guid": "c47b99ea-e8c4-66d2-6195-8aa44b8e3896",
|
||||
"path": "z://MP4\\Let's Sing Karaoke\\Flower Drum Song, The - I Enjoy Being A Girl (Karaoke & Lyrics).mp4",
|
||||
"title": "I Enjoy Being A Girl"
|
||||
},
|
||||
{
|
||||
"artist": "Dolly Parton",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"genre": "Karaoke",
|
||||
"guid": "9567a6ed-b31c-eb8d-7cdd-f99e09a29f1d",
|
||||
"path": "z://MP4\\KaraokeOnVEVO\\Dolly Parton - Here You Come Again.mp4",
|
||||
"title": "Here You Come Again"
|
||||
},
|
||||
{
|
||||
"artist": "KSI ft. Tom Grennan",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"genre": "Karaoke",
|
||||
"guid": "a53bc0b1-9eff-7710-fc02-b669bf657325",
|
||||
"path": "z://MP4\\ZoomKaraokeOfficial\\KSI feat Tom Grennan - Not Over Yet.mp4",
|
||||
"title": "Not Over Yet"
|
||||
},
|
||||
{
|
||||
"artist": "The Doobie Brothers",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"guid": "0876620f-5a41-af24-42b5-965e9d767819",
|
||||
"path": "z://MP4\\Stingray Karaoke\\Minute By Minute The Doobie Brothers Karaoke with Lyrics.mp4",
|
||||
"title": "Minute By Minute"
|
||||
},
|
||||
{
|
||||
"artist": "Ariana Grande ft. The Weeknd",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"genre": "Karaoke",
|
||||
"guid": "4dcd11fa-3caf-5818-f3de-5b99cf722c80",
|
||||
"path": "z://MP4\\Sing King Karaoke\\Ariana Grande & The Weeknd - Love Me Harder.mp4",
|
||||
"title": "Love Me Harder"
|
||||
}
|
||||
]
|
||||
@ -1,162 +0,0 @@
|
||||
[
|
||||
{
|
||||
"artist": "AC/DC",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"guid": "8946008c-7acc-d187-60e6-5286e55ad502",
|
||||
"path": "z://MP4\\ACDC - Shot In The Dark (Karaoke Version).mp4",
|
||||
"title": "Shot In The Dark"
|
||||
},
|
||||
{
|
||||
"artist": "Afrojack ft. Chris Brown",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"guid": "f9fd52fc-0c5a-01d5-caf5-4476e9172401",
|
||||
"path": "z://MP4\\Afrojack and Chris Brown - As Your Friend.mp4",
|
||||
"title": "As Your Friend"
|
||||
},
|
||||
{
|
||||
"artist": "Andy Grammer",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"guid": "ff27874e-68c3-2c0b-d302-a4bf36d2f76c",
|
||||
"path": "z://MP4\\Andy Grammer - Honey I Am Good.mp4",
|
||||
"title": "Honey I Am Good"
|
||||
},
|
||||
{
|
||||
"artist": "Avicii ft. Nicky Romero",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"guid": "9df89f14-a568-14a3-5081-77d6ee3d5cf8",
|
||||
"path": "z://MP4\\Avicii and Nicky Romero - I Could Be The One.mp4",
|
||||
"title": "I Could Be The One"
|
||||
},
|
||||
{
|
||||
"artist": "Bastille",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"guid": "71b6e7e2-3ad5-1b46-b4b5-d33b961451f7",
|
||||
"path": "z://MP4\\Bastille - Flaws.mp4",
|
||||
"title": "Flaws"
|
||||
},
|
||||
{
|
||||
"artist": "Big Sean ft. E40",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"guid": "3ff82151-be07-70e0-7e05-145dc0abec4a",
|
||||
"path": "z://MP4\\Big Sean and E40 - I Don t F--k With You.mp4",
|
||||
"title": "I Don t F--k With You"
|
||||
},
|
||||
{
|
||||
"artist": "Blake Shelton ft. Gwen Stefani",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"guid": "0443d7b1-daa0-6278-fe68-635a91ef7fcf",
|
||||
"path": "z://MP4\\Blake Shelton feat. Gwen Stefani - Happy Anywhere (Karaoke Version).mp4",
|
||||
"title": "Happy Anywhere"
|
||||
},
|
||||
{
|
||||
"artist": "Bradley Cooper",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"guid": "fa1a3926-96e0-3615-9785-9c24f39a937a",
|
||||
"path": "z://MP4\\Bradley Cooper - Maybe It's Time (Karaoke Instrumental) A Star Is Born.mp4",
|
||||
"title": "Maybe It's Time (A Star is Born)"
|
||||
},
|
||||
{
|
||||
"artist": "Bruno Mars ft. Cardi B",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"guid": "946a1077-ab9e-300c-3a72-b1e141e9706f",
|
||||
"path": "z://MP4\\Bruno Mars ft. Cardi B - Finesse Remix (Karaoke Version).mp4",
|
||||
"title": "Finesse Remix"
|
||||
},
|
||||
{
|
||||
"artist": "Cardi B ft. Bad Bunny, J Balvin",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"guid": "c295023e-e8db-edea-0d78-33125910fafd",
|
||||
"path": "z://MP4\\Cardi B, Bad Bunny & J Balvin - I Like It Karaoke Lyrics Instrumental Cover Full Tracks.mp4",
|
||||
"title": "I Like It"
|
||||
},
|
||||
{
|
||||
"artist": "Coldplay",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"guid": "3d0f477b-0c30-adac-57e8-155937c027ec",
|
||||
"path": "z://MP4\\Coldplay - Orphans Karaoke Instrumetal Lyrics Cover Original Key A.mp4",
|
||||
"title": "Orphans"
|
||||
},
|
||||
{
|
||||
"artist": "Juice WRLD & Marshmello",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"guid": "24bbbfd4-3ee5-0661-9f77-5a27ff024656",
|
||||
"path": "z://MP4\\Come & Go - Juice WRLD & Marshmello (Karaoke Instrumental).mp4",
|
||||
"title": "Come & Go"
|
||||
},
|
||||
{
|
||||
"artist": "Conor Maynard and Wiley",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"guid": "136d0364-c760-3faa-892c-e6553cf0d6e4",
|
||||
"path": "z://MP4\\Conor Maynard and Wiley - Animal.mp4",
|
||||
"title": "Animal"
|
||||
},
|
||||
{
|
||||
"artist": "Drake",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"guid": "125df572-009f-057c-b0d3-2403bb46519a",
|
||||
"path": "z://MP4\\Drake - Nice For What (Karaoke).mp4",
|
||||
"title": "Nice For What"
|
||||
},
|
||||
{
|
||||
"artist": "Eazy E",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"guid": "b129ac3c-45c0-dc8a-2f6b-a42c33a2f8bc",
|
||||
"path": "z://MP4\\Eazy E - Gimme That Nut.mp4",
|
||||
"title": "Gimme That Nut"
|
||||
},
|
||||
{
|
||||
"artist": "Eazy-E",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"guid": "a0a2bdc6-ddb2-16d0-5331-0251d47fc7a0",
|
||||
"path": "z://MP4\\Eazy-E - Boyz n The Hood.mp4",
|
||||
"title": "Boyz n The Hood"
|
||||
},
|
||||
{
|
||||
"artist": "Ellie Goulding",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"guid": "0693b62a-c07c-2fce-786a-463a7d85adf4",
|
||||
"path": "z://MP4\\Ellie Goulding - Love Me Like You Do.mp4",
|
||||
"title": "Love Me Like You Do"
|
||||
},
|
||||
{
|
||||
"artist": "Halsey ft. Machine Gun Kelly",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"guid": "705a1f72-ee7d-9234-6d62-8bed0309e6f9",
|
||||
"path": "z://MP4\\Forget Me Too - Halsey Machine Gun Kelly (Karaoke Version).mp4",
|
||||
"title": "Forget Me Too"
|
||||
},
|
||||
{
|
||||
"artist": "Frank Sinatra",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"guid": "7b458762-e33b-98c4-b5c3-ac60f397c075",
|
||||
"path": "z://MP4\\Frank Sinatra - New York, New York.mp4",
|
||||
"title": "New York, New York"
|
||||
},
|
||||
{
|
||||
"artist": "Garth Brooks",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"guid": "953d8eae-4be3-2b08-8695-28afaa4bd468",
|
||||
"path": "z://MP4\\Garth Brooks - Ain t Goin Down (Til The Sun Comes Up).mp4",
|
||||
"title": "Ain t Goin Down (Til The Sun Comes Up)"
|
||||
}
|
||||
]
|
||||
@ -1,50 +0,0 @@
|
||||
[
|
||||
{
|
||||
"artist": "Eazy-E",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"guid": "a0a2bdc6-ddb2-16d0-5331-0251d47fc7a0",
|
||||
"path": "z://MP4\\Eazy-E - Boyz n The Hood.mp4",
|
||||
"title": "Boyz n The Hood"
|
||||
},
|
||||
{
|
||||
"artist": "21 Savage ft. Offset w~ Metro Boomin & Travis Scott",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"guid": "4ea9624f-e7b6-4db0-839c-e9e163c452ce",
|
||||
"path": "z://MP4\\Karaoke Sing Sing\\21 Savage ft Offset w~ Metro Boomin & Travis Scott - Ghostface Killers (Karaoke Version).mp4",
|
||||
"title": "Ghostface Killers"
|
||||
},
|
||||
{
|
||||
"artist": "Afrojack ft. Chris Brown",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"guid": "f9fd52fc-0c5a-01d5-caf5-4476e9172401",
|
||||
"path": "z://MP4\\Afrojack and Chris Brown - As Your Friend.mp4",
|
||||
"title": "As Your Friend"
|
||||
},
|
||||
{
|
||||
"artist": "Andy Grammer",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"guid": "ff27874e-68c3-2c0b-d302-a4bf36d2f76c",
|
||||
"path": "z://MP4\\Andy Grammer - Honey I Am Good.mp4",
|
||||
"title": "Honey I Am Good"
|
||||
},
|
||||
{
|
||||
"artist": "Avicii ft. Nicky Romero",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"guid": "9df89f14-a568-14a3-5081-77d6ee3d5cf8",
|
||||
"path": "z://MP4\\Avicii and Nicky Romero - I Could Be The One.mp4",
|
||||
"title": "I Could Be The One"
|
||||
},
|
||||
{
|
||||
"artist": "Bastille",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"guid": "71b6e7e2-3ad5-1b46-b4b5-d33b961451f7",
|
||||
"path": "z://MP4\\Bastille - Flaws.mp4",
|
||||
"title": "Flaws"
|
||||
}
|
||||
]
|
||||
@ -1,66 +0,0 @@
|
||||
[
|
||||
{
|
||||
"artist": "Blink-182",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"guid": "49e79b7b-a610-a8b2-cee3-a4580ba5bb1e",
|
||||
"path": "z://MP4\\KaraFun Karaoke\\Karaoke I Miss You - Blink-182.mp4",
|
||||
"title": "I Miss You"
|
||||
},
|
||||
{
|
||||
"artist": "Blink-182",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"guid": "cb394951-2094-513c-c0a5-663c01bfd5ed",
|
||||
"path": "z://MP4\\KaraFun Karaoke\\Karaoke What s My Age Again - Blink-182.mp4",
|
||||
"title": "What s My Age Again"
|
||||
},
|
||||
{
|
||||
"artist": "Blink-182",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"guid": "5d09cee0-40c2-d202-7c16-cbea8a4e892a",
|
||||
"path": "z://MP4\\KaraFun Karaoke\\Karaoke First Date - Blink-182.mp4",
|
||||
"title": "First Date"
|
||||
},
|
||||
{
|
||||
"artist": "Blink-182",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"guid": "147425-unknown",
|
||||
"path": "z://MP4\\Sing King Karaoke\\blink-182 - All the Small Things.mp4",
|
||||
"title": "All the Small Things"
|
||||
},
|
||||
{
|
||||
"artist": "Blink-182",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"guid": "126671-unknown",
|
||||
"path": "z://MP4\\Let's Sing Karaoke\\Blink-182 - The Rock Show.mp4",
|
||||
"title": "The Rock Show"
|
||||
},
|
||||
{
|
||||
"artist": "Blink-182",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"guid": "307387-unknown",
|
||||
"path": "z://CDG\\SBI\\SBI-05\\Blink-182 - Reckless Abandon.mp3",
|
||||
"title": "Reckless Abandon"
|
||||
},
|
||||
{
|
||||
"artist": "Blink-182",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"guid": "18864-unknown",
|
||||
"path": "z://MP4\\KaraFun Karaoke\\Karaoke Dammit (Growing Up) - Blink-182.mp4",
|
||||
"title": "Dammit (Growing Up)"
|
||||
},
|
||||
{
|
||||
"artist": "Blink-182",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"guid": "22728-unknown",
|
||||
"path": "z://MP4\\KaraFun Karaoke\\Karaoke Feeling This - Blink-182.mp4",
|
||||
"title": "Feeling This"
|
||||
}
|
||||
]
|
||||
@ -1,260 +0,0 @@
|
||||
[
|
||||
{
|
||||
"artist": "Pitbull ft. Ne-Yo, Afrojack & Nayer",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"genre": "Karaoke",
|
||||
"guid": "17685554-1e6d-09be-1888-d16cad296928",
|
||||
"path": "z://MP4\\Sing King Karaoke\\Pitbull Featuring Ne-Yo, Afrojack & Nayer - Give Me Everything.mp4",
|
||||
"title": "Give Me Everything"
|
||||
},
|
||||
{
|
||||
"artist": "Aretha Franklin ft. G. Michael",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"genre": "Karaoke",
|
||||
"guid": "c55f8a87-b921-8239-7ea5-a7a7b8d27a5b",
|
||||
"path": "z://MP4\\KaraokeOnVEVO\\Aretha Franklin & G. Michael - I Knew You Were Waiting For Me.mp4",
|
||||
"title": "I Knew You Were Waiting For Me"
|
||||
},
|
||||
{
|
||||
"artist": "P!nk",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"guid": "test-1",
|
||||
"path": "test/path/pink.mp4",
|
||||
"title": "Just Give Me a Reason"
|
||||
},
|
||||
{
|
||||
"artist": "Panic! At The Disco",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"guid": "test-2",
|
||||
"path": "test/path/panic.mp4",
|
||||
"title": "High Hopes"
|
||||
},
|
||||
{
|
||||
"artist": "3OH!3",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"guid": "test-3",
|
||||
"path": "test/path/3oh3.mp4",
|
||||
"title": "Don't Trust Me"
|
||||
},
|
||||
{
|
||||
"artist": "a-ha",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"guid": "test-4",
|
||||
"path": "test/path/aha.mp4",
|
||||
"title": "Take On Me"
|
||||
},
|
||||
{
|
||||
"artist": "Ne-Yo",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"guid": "test-5",
|
||||
"path": "test/path/neyo.mp4",
|
||||
"title": "So Sick"
|
||||
},
|
||||
{
|
||||
"artist": "The B-52s",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"guid": "test-6",
|
||||
"path": "test/path/b52s.mp4",
|
||||
"title": "Love Shack"
|
||||
},
|
||||
{
|
||||
"artist": "Salt-N-Pepa",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"guid": "test-7",
|
||||
"path": "test/path/saltnpepa.mp4",
|
||||
"title": "Push It"
|
||||
},
|
||||
{
|
||||
"artist": "Sir Mix-A-Lot",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"guid": "test-8",
|
||||
"path": "test/path/sirmixalot.mp4",
|
||||
"title": "Baby Got Back"
|
||||
},
|
||||
{
|
||||
"artist": "O-Town",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"guid": "test-9",
|
||||
"path": "test/path/otown.mp4",
|
||||
"title": "All or Nothing"
|
||||
},
|
||||
{
|
||||
"artist": "Jay-Z",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"guid": "test-10",
|
||||
"path": "test/path/jayz.mp4",
|
||||
"title": "Empire State of Mind"
|
||||
},
|
||||
{
|
||||
"artist": "G-Eazy & Halsey",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"guid": "test-11",
|
||||
"path": "test/path/geazy.mp4",
|
||||
"title": "Him & I"
|
||||
},
|
||||
{
|
||||
"artist": "Bachman-Turner Overdrive",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"guid": "test-12",
|
||||
"path": "test/path/bto.mp4",
|
||||
"title": "You Ain't Seen Nothing Yet"
|
||||
},
|
||||
{
|
||||
"artist": "Maroon 5",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"guid": "test-13",
|
||||
"path": "test/path/maroon5.mp4",
|
||||
"title": "Sugar"
|
||||
},
|
||||
{
|
||||
"artist": "U2",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"guid": "test-14",
|
||||
"path": "test/path/u2.mp4",
|
||||
"title": "With or Without You"
|
||||
},
|
||||
{
|
||||
"artist": "3 Doors Down",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"guid": "test-15",
|
||||
"path": "test/path/3doors.mp4",
|
||||
"title": "Kryptonite"
|
||||
},
|
||||
{
|
||||
"artist": "The Jackson 5",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"guid": "test-16",
|
||||
"path": "test/path/jackson5.mp4",
|
||||
"title": "I Want You Back"
|
||||
},
|
||||
{
|
||||
"artist": "The 1975",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"guid": "test-17",
|
||||
"path": "test/path/1975.mp4",
|
||||
"title": "Chocolate"
|
||||
},
|
||||
{
|
||||
"artist": "S Club 7",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"guid": "test-18",
|
||||
"path": "test/path/sclub7.mp4",
|
||||
"title": "Never Had a Dream Come True"
|
||||
},
|
||||
{
|
||||
"artist": "Sum 41",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"guid": "test-19",
|
||||
"path": "test/path/sum41.mp4",
|
||||
"title": "In Too Deep"
|
||||
},
|
||||
{
|
||||
"artist": "98 Degrees",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"guid": "test-20",
|
||||
"path": "test/path/98degrees.mp4",
|
||||
"title": "Because of You"
|
||||
},
|
||||
{
|
||||
"artist": "Shawn Mendes ft. Camila Cabello",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"guid": "test-21",
|
||||
"path": "test/path/shawncamila.mp4",
|
||||
"title": "Señorita"
|
||||
},
|
||||
{
|
||||
"artist": "Rihanna ft. Drake",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"guid": "test-22",
|
||||
"path": "test/path/rihannadrake.mp4",
|
||||
"title": "What's My Name"
|
||||
},
|
||||
{
|
||||
"artist": "Calvin Harris ft. Ellie Goulding",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"guid": "test-23",
|
||||
"path": "test/path/calvinellie.mp4",
|
||||
"title": "Outside"
|
||||
},
|
||||
{
|
||||
"artist": "Wiz Khalifa ft. Charlie Puth",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"guid": "test-24",
|
||||
"path": "test/path/wizcharlie.mp4",
|
||||
"title": "See You Again"
|
||||
},
|
||||
{
|
||||
"artist": "The Weeknd ft. Daft Punk",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"guid": "test-25",
|
||||
"path": "test/path/weeknddaftpunk.mp4",
|
||||
"title": "Starboy"
|
||||
},
|
||||
{
|
||||
"artist": "MAGIC!",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"guid": "test-26",
|
||||
"path": "test/path/magic.mp4",
|
||||
"title": "Rude"
|
||||
},
|
||||
{
|
||||
"artist": "Tony! Toni! Tone!",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"guid": "test-27",
|
||||
"path": "test/path/tonytonitone.mp4",
|
||||
"title": "If I Had No Loot"
|
||||
},
|
||||
{
|
||||
"artist": "Little Mix",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"guid": "test-28",
|
||||
"path": "test/path/littlemix.mp4",
|
||||
"title": "Black Magic"
|
||||
},
|
||||
{
|
||||
"artist": "Anne-Marie",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"guid": "test-29",
|
||||
"path": "test/path/annemarie.mp4",
|
||||
"title": "2002"
|
||||
},
|
||||
{
|
||||
"artist": "Sophie Ellis-Bextor",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"guid": "test-30",
|
||||
"path": "test/path/sophieellis.mp4",
|
||||
"title": "Murder on the Dancefloor"
|
||||
}
|
||||
]
|
||||
@ -1,34 +0,0 @@
|
||||
[
|
||||
{
|
||||
"artist": "The Beatles",
|
||||
"title": "Hey Jude",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"guid": "test-guid-1",
|
||||
"path": "test/path/1"
|
||||
},
|
||||
{
|
||||
"artist": "Queen",
|
||||
"title": "Bohemian Rhapsody",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"guid": "test-guid-2",
|
||||
"path": "test/path/2"
|
||||
},
|
||||
{
|
||||
"artist": "Pink Floyd",
|
||||
"title": "Another Brick in the Wall",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"guid": "test-guid-3",
|
||||
"path": "test/path/3"
|
||||
},
|
||||
{
|
||||
"artist": "Coldplay",
|
||||
"title": "Yellow",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"guid": "test-guid-4",
|
||||
"path": "test/path/4"
|
||||
}
|
||||
]
|
||||
@ -1,42 +0,0 @@
|
||||
[
|
||||
{
|
||||
"artist": "Lavato, Demi & Joe Jonas",
|
||||
"title": "This Is Me",
|
||||
"guid": "7c1b42d7-14e4-2f6d-40ba-fa41a217c0ab"
|
||||
},
|
||||
{
|
||||
"artist": "Drake",
|
||||
"title": "Passionfruit",
|
||||
"guid": "2db9fd74-3353-0fb2-a289-666ad6e1020a"
|
||||
},
|
||||
{
|
||||
"artist": "Coldplay",
|
||||
"title": "Paradise",
|
||||
"guid": "7820a2af-e4fa-f3a2-d070-46ad73e08ace"
|
||||
},
|
||||
{
|
||||
"artist": "Chris Brown Featuring T-Pain",
|
||||
"title": "Kiss Kiss",
|
||||
"guid": "b0329fd3-dadd-bee1-8807-ae090b3a1b64"
|
||||
},
|
||||
{
|
||||
"artist": "Destiny's Child",
|
||||
"title": "Survivor",
|
||||
"guid": "24f3fb48-ea83-12c1-f998-cfbb917ab927"
|
||||
},
|
||||
{
|
||||
"artist": "Nicki Minaj Ft.drake",
|
||||
"title": "Moment 4 Life (clean)",
|
||||
"guid": "49a1a33d-799b-bbfd-1875-3c551424f75a"
|
||||
},
|
||||
{
|
||||
"artist": "Corby, Matt",
|
||||
"title": "Brother",
|
||||
"guid": "2f699149-d5e9-f17b-a93e-eae193630509"
|
||||
},
|
||||
{
|
||||
"artist": "The Flower Drum Song",
|
||||
"title": "I Enjoy Being A Girl",
|
||||
"guid": "c47b99ea-e8c4-66d2-6195-8aa44b8e3896"
|
||||
}
|
||||
]
|
||||
@ -1,26 +0,0 @@
|
||||
[
|
||||
{
|
||||
"artist": "ACDC",
|
||||
"title": "Shot In The Dark",
|
||||
"disabled": false,
|
||||
"favorite": true,
|
||||
"guid": "8946008c-7acc-d187-60e6-5286e55ad502",
|
||||
"path": "z://MP4\\ACDC - Shot In The Dark (Karaoke Version).mp4"
|
||||
},
|
||||
{
|
||||
"artist": "Bruno Mars ft. Cardi B",
|
||||
"title": "Finesse Remix",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"guid": "946a1077-ab9e-300c-3a72-b1e141e9706f",
|
||||
"path": "z://MP4\\Bruno Mars ft. Cardi B - Finesse Remix (Karaoke Version).mp4"
|
||||
},
|
||||
{
|
||||
"artist": "Taylor Swift",
|
||||
"title": "Love Story",
|
||||
"disabled": false,
|
||||
"favorite": true,
|
||||
"guid": "d783e6c5-761f-4fc3-bfcf-6089cdfc8f96",
|
||||
"path": "z://MP4\\Taylor Swift - Love Story.mp4"
|
||||
}
|
||||
]
|
||||
19
restart_services.sh
Executable file
19
restart_services.sh
Executable file
@ -0,0 +1,19 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Quick restart script for after Mac reboots
|
||||
# This assumes Docker Desktop is already running
|
||||
|
||||
echo "🔄 Restarting MusicBrainz services..."
|
||||
|
||||
# Navigate to musicbrainz-docker
|
||||
cd ../musicbrainz-docker
|
||||
|
||||
# Start services
|
||||
MUSICBRAINZ_WEB_SERVER_PORT=5001 docker-compose up -d
|
||||
|
||||
echo "✅ Services started!"
|
||||
echo "⏳ Database may take 5-10 minutes to fully initialize"
|
||||
echo ""
|
||||
echo "📊 Check status: docker-compose ps"
|
||||
echo "📋 View logs: docker-compose logs -f db"
|
||||
echo "🧪 Test when ready: cd ../musicbrainz-cleaner && docker-compose run --rm musicbrainz-cleaner python3 quick_test_20.py"
|
||||
@ -1,19 +1,20 @@
|
||||
"""
|
||||
Legacy HTTP API client for MusicBrainz Data Cleaner.
|
||||
Used as fallback when direct database access is not available.
|
||||
HTTP API client for MusicBrainz Data Cleaner.
|
||||
Implements the MusicBrainzDataProvider interface for API-based access.
|
||||
"""
|
||||
|
||||
import requests
|
||||
import time
|
||||
from typing import Dict, Optional, Any
|
||||
from typing import Dict, Optional, Any, Tuple
|
||||
from ..config.constants import (
|
||||
DEFAULT_MUSICBRAINZ_URL, API_REQUEST_DELAY, REQUEST_TIMEOUT,
|
||||
SUCCESS_MESSAGES, ERROR_MESSAGES
|
||||
)
|
||||
from ..core.interfaces import MusicBrainzDataProvider
|
||||
|
||||
|
||||
class MusicBrainzAPIClient:
|
||||
"""Legacy HTTP API client for MusicBrainz (fallback option)."""
|
||||
class MusicBrainzAPIClient(MusicBrainzDataProvider):
|
||||
"""HTTP API client for MusicBrainz implementing the data provider interface."""
|
||||
|
||||
def __init__(self, base_url: str = DEFAULT_MUSICBRAINZ_URL):
|
||||
self.base_url = base_url
|
||||
@ -30,8 +31,8 @@ class MusicBrainzAPIClient:
|
||||
print(f"API connection test failed: {e}")
|
||||
return False
|
||||
|
||||
def search_artist(self, artist_name: str) -> Optional[Dict[str, Any]]:
|
||||
"""Search for artist by name using API."""
|
||||
def fuzzy_search_artist(self, artist_name: str) -> Optional[Tuple[str, str, float]]:
|
||||
"""Search for artist by name using API. Returns (artist_name, mbid, score)."""
|
||||
try:
|
||||
url = f"{self.base_url}/ws/2/artist/?query=name:{artist_name}&fmt=json"
|
||||
response = self.session.get(url)
|
||||
@ -39,14 +40,15 @@ class MusicBrainzAPIClient:
|
||||
data = response.json()
|
||||
|
||||
if data.get('artists') and len(data['artists']) > 0:
|
||||
return data['artists'][0]
|
||||
artist = data['artists'][0]
|
||||
return (artist['name'], artist['id'], 1.0) # Perfect match for API
|
||||
return None
|
||||
except Exception as e:
|
||||
print(f"API search failed for artist '{artist_name}': {e}")
|
||||
return None
|
||||
|
||||
def search_recording(self, title: str, artist_mbid: Optional[str] = None) -> Optional[Dict[str, Any]]:
|
||||
"""Search for recording by title and optionally artist using API."""
|
||||
def fuzzy_search_recording(self, title: str, artist_mbid: Optional[str] = None) -> Optional[Tuple[str, str, float]]:
|
||||
"""Search for recording by title and optionally artist using API. Returns (recording_name, mbid, score)."""
|
||||
try:
|
||||
if artist_mbid:
|
||||
url = f"{self.base_url}/ws/2/recording/?query=arid:{artist_mbid}%20AND%20name:{title}&fmt=json"
|
||||
@ -58,7 +60,8 @@ class MusicBrainzAPIClient:
|
||||
data = response.json()
|
||||
|
||||
if data.get('recordings') and len(data['recordings']) > 0:
|
||||
return data['recordings'][0]
|
||||
recording = data['recordings'][0]
|
||||
return (recording['title'], recording['id'], 1.0) # Perfect match for API
|
||||
return None
|
||||
except Exception as e:
|
||||
print(f"API search failed for recording '{title}': {e}")
|
||||
|
||||
@ -13,9 +13,11 @@ from ..config.constants import (
|
||||
TITLE_SIMILARITY_THRESHOLD, ARTIST_SIMILARITY_THRESHOLD,
|
||||
SUCCESS_MESSAGES, ERROR_MESSAGES
|
||||
)
|
||||
from ..utils.artist_title_processing import parse_complex_collaboration, parse_collaborators, generate_title_variations
|
||||
from ..core.interfaces import MusicBrainzDataProvider
|
||||
|
||||
|
||||
class MusicBrainzDatabase:
|
||||
class MusicBrainzDatabase(MusicBrainzDataProvider):
|
||||
"""Direct PostgreSQL database access for MusicBrainz with fuzzy search."""
|
||||
|
||||
def __init__(self, host: str = DB_HOST, port: int = DB_PORT,
|
||||
@ -34,7 +36,7 @@ class MusicBrainzDatabase:
|
||||
try:
|
||||
# Use the direct connection method that works
|
||||
self.connection = psycopg2.connect(
|
||||
host='172.18.0.2', # Docker container IP that works
|
||||
host='db', # Use Docker service name
|
||||
port=self.port,
|
||||
database=self.database,
|
||||
user=self.user,
|
||||
@ -167,6 +169,32 @@ class MusicBrainzDatabase:
|
||||
))
|
||||
return (result['name'], result['gid'], 0.98)
|
||||
|
||||
# Try matching with "&" vs "and" variations
|
||||
if '&' in main_artist:
|
||||
and_variant = main_artist.replace('&', 'and')
|
||||
query = "SELECT name, gid FROM artist WHERE name = %s LIMIT 1"
|
||||
self.cursor.execute(query, (and_variant,))
|
||||
result = self.cursor.fetchone()
|
||||
if result:
|
||||
print(SUCCESS_MESSAGES['fuzzy_match_found'].format(
|
||||
original=artist_name,
|
||||
matched=result['name'],
|
||||
score=0.97
|
||||
))
|
||||
return (result['name'], result['gid'], 0.97)
|
||||
elif ' and ' in main_artist.lower():
|
||||
ampersand_variant = main_artist.replace(' and ', ' & ')
|
||||
query = "SELECT name, gid FROM artist WHERE name = %s LIMIT 1"
|
||||
self.cursor.execute(query, (ampersand_variant,))
|
||||
result = self.cursor.fetchone()
|
||||
if result:
|
||||
print(SUCCESS_MESSAGES['fuzzy_match_found'].format(
|
||||
original=artist_name,
|
||||
matched=result['name'],
|
||||
score=0.97
|
||||
))
|
||||
return (result['name'], result['gid'], 0.97)
|
||||
|
||||
# Try fuzzy search
|
||||
# Use more specific patterns to avoid false matches
|
||||
# Also try different dash characters and name variations
|
||||
@ -322,7 +350,7 @@ class MusicBrainzDatabase:
|
||||
best_recording_count = artist['recording_count']
|
||||
print(f" 🎯 New best match: {artist['name']} (score: {score}, recordings: {artist['recording_count']})")
|
||||
|
||||
if best_score >= 80:
|
||||
if best_score >= 70: # Lower threshold for better matching
|
||||
print(SUCCESS_MESSAGES['fuzzy_match_found'].format(
|
||||
original=artist_name,
|
||||
matched=best_match['name'],
|
||||
@ -345,29 +373,8 @@ class MusicBrainzDatabase:
|
||||
return None
|
||||
|
||||
try:
|
||||
# Try with full title first, then with parenthetical content removed
|
||||
search_titles = [title.strip()]
|
||||
|
||||
# Create version with parenthetical content removed
|
||||
import re
|
||||
# Remove all parenthetical content like (clean), (remix), (karaoke), etc.
|
||||
clean_title = re.sub(r'\s*\([^)]*\)', '', title.strip())
|
||||
clean_title = clean_title.strip()
|
||||
if clean_title != title.strip():
|
||||
search_titles.append(clean_title)
|
||||
|
||||
# Also try with specific karaoke patterns removed
|
||||
karaoke_patterns = [
|
||||
r'\s*\(Karaoke Version\)',
|
||||
r'\s*\(Karaoke\)',
|
||||
r'\s*\(Instrumental\)',
|
||||
r'\s*\(Backing Track\)',
|
||||
]
|
||||
for pattern in karaoke_patterns:
|
||||
karaoke_clean = re.sub(pattern, '', title.strip(), flags=re.IGNORECASE)
|
||||
karaoke_clean = karaoke_clean.strip()
|
||||
if karaoke_clean != title.strip() and karaoke_clean not in search_titles:
|
||||
search_titles.append(karaoke_clean)
|
||||
# Generate title variations using the helper method
|
||||
search_titles = generate_title_variations(title)
|
||||
|
||||
if artist_mbid:
|
||||
# Try to find recording by artist and title with all variations
|
||||
@ -408,21 +415,22 @@ class MusicBrainzDatabase:
|
||||
best_score = score
|
||||
best_match = recording
|
||||
|
||||
if best_score >= 80:
|
||||
if best_score >= 70: # Lower threshold for better matching
|
||||
return (best_match['name'], best_match['gid'], best_score / 100.0)
|
||||
else:
|
||||
# No artist constraint - search by title only with all variations
|
||||
for search_title in search_titles:
|
||||
# Try exact match first
|
||||
query = "SELECT name, gid FROM recording WHERE name = %s LIMIT 1"
|
||||
self.cursor.execute(query, (search_title,))
|
||||
# Try exact match first (handle both apostrophe types)
|
||||
search_title_curly = search_title.replace("'", "'")
|
||||
query = "SELECT name, gid FROM recording WHERE name = %s OR name = %s LIMIT 1"
|
||||
self.cursor.execute(query, (search_title, search_title_curly))
|
||||
result = self.cursor.fetchone()
|
||||
if result:
|
||||
return (result['name'], result['gid'], 1.0)
|
||||
|
||||
# Try case-insensitive match
|
||||
query = "SELECT name, gid FROM recording WHERE LOWER(name) = LOWER(%s) LIMIT 1"
|
||||
self.cursor.execute(query, (search_title,))
|
||||
# Try case-insensitive match (handle both apostrophe types)
|
||||
query = "SELECT name, gid FROM recording WHERE LOWER(name) = LOWER(%s) OR LOWER(name) = LOWER(%s) LIMIT 1"
|
||||
self.cursor.execute(query, (search_title, search_title_curly))
|
||||
result = self.cursor.fetchone()
|
||||
if result:
|
||||
return (result['name'], result['gid'], 0.99)
|
||||
@ -432,8 +440,10 @@ class MusicBrainzDatabase:
|
||||
best_score = 0
|
||||
|
||||
for search_title in search_titles:
|
||||
query = "SELECT name, gid FROM recording WHERE name ILIKE %s LIMIT 50"
|
||||
self.cursor.execute(query, (f"%{search_title}%",))
|
||||
# Handle both apostrophe types in partial search
|
||||
search_title_curly = search_title.replace("'", "'")
|
||||
query = "SELECT name, gid FROM recording WHERE name ILIKE %s OR name ILIKE %s LIMIT 50"
|
||||
self.cursor.execute(query, (f"%{search_title}%", f"%{search_title_curly}%"))
|
||||
recordings = self.cursor.fetchall()
|
||||
|
||||
for recording in recordings:
|
||||
@ -442,7 +452,7 @@ class MusicBrainzDatabase:
|
||||
best_score = score
|
||||
best_match = recording
|
||||
|
||||
if best_score >= 80:
|
||||
if best_score >= 70: # Lower threshold for better matching
|
||||
return (best_match['name'], best_match['gid'], best_score / 100.0)
|
||||
|
||||
return None
|
||||
@ -451,6 +461,399 @@ class MusicBrainzDatabase:
|
||||
print(f"Error in fuzzy_search_recording: {e}")
|
||||
return None
|
||||
|
||||
def fuzzy_search_artist_with_recording(self, artist_name: str, recording_title: str) -> Optional[Tuple[str, str, float]]:
|
||||
"""
|
||||
Fuzzy search for artist by name, prioritizing artists that have the specific recording.
|
||||
Returns (artist_name, mbid, similarity_score) or None.
|
||||
"""
|
||||
if not self.connection:
|
||||
return None
|
||||
|
||||
try:
|
||||
# Clean artist name
|
||||
clean_name = artist_name.strip()
|
||||
|
||||
# Handle collaborations - extract main artist
|
||||
if 'ft.' in clean_name.lower():
|
||||
main_artist = clean_name.split('ft.')[0].strip()
|
||||
elif 'feat.' in clean_name.lower():
|
||||
main_artist = clean_name.split('feat.')[0].strip()
|
||||
else:
|
||||
main_artist = clean_name
|
||||
|
||||
# Find all potential artists (including aliases)
|
||||
all_artists = []
|
||||
|
||||
# 1. Exact matches in artist names
|
||||
query = "SELECT name, gid FROM artist WHERE name = %s"
|
||||
self.cursor.execute(query, (main_artist,))
|
||||
exact_results = self.cursor.fetchall()
|
||||
for result in exact_results:
|
||||
all_artists.append({
|
||||
'name': result['name'],
|
||||
'gid': result['gid'],
|
||||
'match_type': 'exact_name',
|
||||
'score': 1.0
|
||||
})
|
||||
|
||||
# 2. Exact matches in artist aliases
|
||||
query = """
|
||||
SELECT a.name, a.gid
|
||||
FROM artist a
|
||||
JOIN artist_alias aa ON a.id = aa.artist
|
||||
WHERE aa.name = %s
|
||||
"""
|
||||
self.cursor.execute(query, (main_artist,))
|
||||
alias_results = self.cursor.fetchall()
|
||||
for result in alias_results:
|
||||
all_artists.append({
|
||||
'name': result['name'],
|
||||
'gid': result['gid'],
|
||||
'match_type': 'exact_alias',
|
||||
'score': 0.95
|
||||
})
|
||||
|
||||
# 3. Case-insensitive matches
|
||||
query = "SELECT name, gid FROM artist WHERE LOWER(name) = LOWER(%s)"
|
||||
self.cursor.execute(query, (main_artist,))
|
||||
case_results = self.cursor.fetchall()
|
||||
for result in case_results:
|
||||
# Avoid duplicates
|
||||
if not any(a['gid'] == result['gid'] for a in all_artists):
|
||||
all_artists.append({
|
||||
'name': result['name'],
|
||||
'gid': result['gid'],
|
||||
'match_type': 'case_insensitive',
|
||||
'score': 0.99
|
||||
})
|
||||
|
||||
# 4. Fuzzy matches in names and aliases
|
||||
name_variants = [main_artist]
|
||||
# Add dash variants
|
||||
name_variants.extend([
|
||||
main_artist.replace('-', '‐'),
|
||||
main_artist.replace('‐', '-'),
|
||||
])
|
||||
|
||||
# Add name variations (e.g., "S Club 7" -> "S Club")
|
||||
for variant in name_variants[:]: # Copy list to avoid modification during iteration
|
||||
if ' ' in variant:
|
||||
parts = variant.split()
|
||||
if len(parts) > 1 and parts[-1].isdigit():
|
||||
name_variants.append(' '.join(parts[:-1]))
|
||||
|
||||
# Search for fuzzy matches
|
||||
for variant in name_variants:
|
||||
# Search in artist names
|
||||
query = "SELECT name, gid FROM artist WHERE name ILIKE %s"
|
||||
self.cursor.execute(query, (f'%{variant}%',))
|
||||
fuzzy_results = self.cursor.fetchall()
|
||||
for result in fuzzy_results:
|
||||
if not any(a['gid'] == result['gid'] for a in all_artists):
|
||||
score = fuzz.ratio(main_artist.lower(), result['name'].lower()) / 100.0
|
||||
if score >= 0.8: # Only include good matches
|
||||
all_artists.append({
|
||||
'name': result['name'],
|
||||
'gid': result['gid'],
|
||||
'match_type': 'fuzzy_name',
|
||||
'score': score
|
||||
})
|
||||
|
||||
# Search in artist aliases
|
||||
query = """
|
||||
SELECT a.name, a.gid
|
||||
FROM artist a
|
||||
JOIN artist_alias aa ON a.id = aa.artist
|
||||
WHERE aa.name ILIKE %s
|
||||
"""
|
||||
self.cursor.execute(query, (f'%{variant}%',))
|
||||
fuzzy_alias_results = self.cursor.fetchall()
|
||||
for result in fuzzy_alias_results:
|
||||
if not any(a['gid'] == result['gid'] for a in all_artists):
|
||||
score = fuzz.ratio(main_artist.lower(), result['name'].lower()) / 100.0
|
||||
if score >= 0.8: # Only include good matches
|
||||
all_artists.append({
|
||||
'name': result['name'],
|
||||
'gid': result['gid'],
|
||||
'match_type': 'fuzzy_alias',
|
||||
'score': score
|
||||
})
|
||||
|
||||
if not all_artists:
|
||||
return None
|
||||
|
||||
# Remove duplicates and get recording information
|
||||
unique_artists = []
|
||||
seen_mbids = set()
|
||||
for artist in all_artists:
|
||||
if artist['gid'] not in seen_mbids:
|
||||
seen_mbids.add(artist['gid'])
|
||||
unique_artists.append(artist)
|
||||
|
||||
print(f"🔍 Found {len(unique_artists)} potential artists for '{main_artist}', checking for recording '{recording_title}'...")
|
||||
|
||||
# Check which artists have the specific recording
|
||||
artists_with_recording = []
|
||||
for artist in unique_artists:
|
||||
# Check if this artist has the specific recording
|
||||
query = """
|
||||
SELECT COUNT(r.id) as recording_count
|
||||
FROM artist a
|
||||
JOIN artist_credit_name acn ON a.id = acn.artist
|
||||
JOIN artist_credit ac ON acn.artist_credit = ac.id
|
||||
JOIN recording r ON ac.id = r.artist_credit
|
||||
WHERE a.gid = %s AND r.name ILIKE %s
|
||||
"""
|
||||
self.cursor.execute(query, (artist['gid'], f'%{recording_title}%'))
|
||||
result = self.cursor.fetchone()
|
||||
has_recording = result['recording_count'] > 0 if result else False
|
||||
|
||||
# Also get total recording count
|
||||
query = """
|
||||
SELECT COUNT(r.id) as recording_count
|
||||
FROM artist a
|
||||
JOIN artist_credit_name acn ON a.id = acn.artist
|
||||
JOIN artist_credit ac ON acn.artist_credit = ac.id
|
||||
JOIN recording r ON ac.id = r.artist_credit
|
||||
WHERE a.gid = %s
|
||||
"""
|
||||
self.cursor.execute(query, (artist['gid'],))
|
||||
total_result = self.cursor.fetchone()
|
||||
total_recordings = total_result['recording_count'] if total_result else 0
|
||||
|
||||
artist_info = {
|
||||
**artist,
|
||||
'has_recording': has_recording,
|
||||
'total_recordings': total_recordings
|
||||
}
|
||||
artists_with_recording.append(artist_info)
|
||||
|
||||
status = "✅ HAS RECORDING" if has_recording else "❌ NO RECORDING"
|
||||
print(f" 📊 {artist['name']} ({artist['gid'][:8]}...): {total_recordings} recordings - {status}")
|
||||
|
||||
# Prioritize artists that have the specific recording
|
||||
artists_with_recording.sort(key=lambda x: (
|
||||
not x['has_recording'], # Artists with recording first
|
||||
-x['score'], # Then by score (highest first)
|
||||
-x['total_recordings'] # Then by total recordings (highest first)
|
||||
))
|
||||
|
||||
if artists_with_recording:
|
||||
best_artist = artists_with_recording[0]
|
||||
print(f" 🎯 Selected: {best_artist['name']} (score: {best_artist['score']:.2f}, has recording: {best_artist['has_recording']})")
|
||||
|
||||
print(SUCCESS_MESSAGES['fuzzy_match_found'].format(
|
||||
original=artist_name,
|
||||
matched=best_artist['name'],
|
||||
score=best_artist['score']
|
||||
))
|
||||
return (best_artist['name'], best_artist['gid'], best_artist['score'])
|
||||
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error in fuzzy_search_artist_with_recording: {e}")
|
||||
return None
|
||||
|
||||
def fuzzy_search_artist_candidates(self, artist_name: str, recording_title: str = None) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Fuzzy search for artist by name, returning multiple candidates.
|
||||
If recording_title is provided, prioritizes artists that have the specific recording.
|
||||
Returns list of artist candidates with their details.
|
||||
"""
|
||||
if not self.connection:
|
||||
return []
|
||||
|
||||
try:
|
||||
# Clean artist name
|
||||
clean_name = artist_name.strip()
|
||||
|
||||
# Handle collaborations - extract main artist
|
||||
if 'ft.' in clean_name.lower():
|
||||
main_artist = clean_name.split('ft.')[0].strip()
|
||||
elif 'feat.' in clean_name.lower():
|
||||
main_artist = clean_name.split('feat.')[0].strip()
|
||||
else:
|
||||
main_artist = clean_name
|
||||
|
||||
# Find all potential artists (including aliases)
|
||||
all_artists = []
|
||||
|
||||
# 1. Exact matches in artist names
|
||||
query = "SELECT name, gid FROM artist WHERE name = %s"
|
||||
self.cursor.execute(query, (main_artist,))
|
||||
exact_results = self.cursor.fetchall()
|
||||
for result in exact_results:
|
||||
all_artists.append({
|
||||
'name': result['name'],
|
||||
'gid': result['gid'],
|
||||
'match_type': 'exact_name',
|
||||
'score': 1.0
|
||||
})
|
||||
|
||||
# 2. Exact matches in artist aliases
|
||||
query = """
|
||||
SELECT a.name, a.gid
|
||||
FROM artist a
|
||||
JOIN artist_alias aa ON a.id = aa.artist
|
||||
WHERE aa.name = %s
|
||||
"""
|
||||
self.cursor.execute(query, (main_artist,))
|
||||
alias_results = self.cursor.fetchall()
|
||||
for result in alias_results:
|
||||
all_artists.append({
|
||||
'name': result['name'],
|
||||
'gid': result['gid'],
|
||||
'match_type': 'exact_alias',
|
||||
'score': 0.95
|
||||
})
|
||||
|
||||
# 3. Case-insensitive matches
|
||||
query = "SELECT name, gid FROM artist WHERE LOWER(name) = LOWER(%s)"
|
||||
self.cursor.execute(query, (main_artist,))
|
||||
case_results = self.cursor.fetchall()
|
||||
for result in case_results:
|
||||
# Avoid duplicates
|
||||
if not any(a['gid'] == result['gid'] for a in all_artists):
|
||||
all_artists.append({
|
||||
'name': result['name'],
|
||||
'gid': result['gid'],
|
||||
'match_type': 'case_insensitive',
|
||||
'score': 0.99
|
||||
})
|
||||
|
||||
# 4. Fuzzy matches in names and aliases
|
||||
name_variants = [main_artist]
|
||||
# Add dash variants
|
||||
name_variants.extend([
|
||||
main_artist.replace('-', '‐'),
|
||||
main_artist.replace('‐', '-'),
|
||||
])
|
||||
|
||||
# Add name variations (e.g., "S Club 7" -> "S Club")
|
||||
for variant in name_variants[:]: # Copy list to avoid modification during iteration
|
||||
if ' ' in variant:
|
||||
parts = variant.split()
|
||||
if len(parts) > 1 and parts[-1].isdigit():
|
||||
name_variants.append(' '.join(parts[:-1]))
|
||||
|
||||
# Search for fuzzy matches
|
||||
for variant in name_variants:
|
||||
# Search in artist names
|
||||
query = "SELECT name, gid FROM artist WHERE name ILIKE %s"
|
||||
self.cursor.execute(query, (f'%{variant}%',))
|
||||
fuzzy_results = self.cursor.fetchall()
|
||||
for result in fuzzy_results:
|
||||
if not any(a['gid'] == result['gid'] for a in all_artists):
|
||||
score = fuzz.ratio(main_artist.lower(), result['name'].lower()) / 100.0
|
||||
if score >= 0.8: # Only include good matches
|
||||
all_artists.append({
|
||||
'name': result['name'],
|
||||
'gid': result['gid'],
|
||||
'match_type': 'fuzzy_name',
|
||||
'score': score
|
||||
})
|
||||
|
||||
# Search in artist aliases
|
||||
query = """
|
||||
SELECT a.name, a.gid
|
||||
FROM artist a
|
||||
JOIN artist_alias aa ON a.id = aa.artist
|
||||
WHERE aa.name ILIKE %s
|
||||
"""
|
||||
self.cursor.execute(query, (f'%{variant}%',))
|
||||
fuzzy_alias_results = self.cursor.fetchall()
|
||||
for result in fuzzy_alias_results:
|
||||
if not any(a['gid'] == result['gid'] for a in all_artists):
|
||||
score = fuzz.ratio(main_artist.lower(), result['name'].lower()) / 100.0
|
||||
if score >= 0.8: # Only include good matches
|
||||
all_artists.append({
|
||||
'name': result['name'],
|
||||
'gid': result['gid'],
|
||||
'match_type': 'fuzzy_alias',
|
||||
'score': score
|
||||
})
|
||||
|
||||
if not all_artists:
|
||||
return []
|
||||
|
||||
# Remove duplicates and get recording information
|
||||
unique_artists = []
|
||||
seen_mbids = set()
|
||||
for artist in all_artists:
|
||||
if artist['gid'] not in seen_mbids:
|
||||
seen_mbids.add(artist['gid'])
|
||||
unique_artists.append(artist)
|
||||
|
||||
# If we have a recording title, check which artists have it
|
||||
if recording_title:
|
||||
print(f"🔍 Found {len(unique_artists)} potential artists for '{main_artist}', checking for recording '{recording_title}'...")
|
||||
|
||||
# Check which artists have the specific recording
|
||||
for artist in unique_artists:
|
||||
# Check if this artist has the specific recording
|
||||
query = """
|
||||
SELECT COUNT(r.id) as recording_count
|
||||
FROM artist a
|
||||
JOIN artist_credit_name acn ON a.id = acn.artist
|
||||
JOIN artist_credit ac ON acn.artist_credit = ac.id
|
||||
JOIN recording r ON ac.id = r.artist_credit
|
||||
WHERE a.gid = %s AND r.name ILIKE %s
|
||||
"""
|
||||
self.cursor.execute(query, (artist['gid'], f'%{recording_title}%'))
|
||||
result = self.cursor.fetchone()
|
||||
has_recording = result['recording_count'] > 0 if result else False
|
||||
|
||||
# Also get total recording count
|
||||
query = """
|
||||
SELECT COUNT(r.id) as recording_count
|
||||
FROM artist a
|
||||
JOIN artist_credit_name acn ON a.id = acn.artist
|
||||
JOIN artist_credit ac ON acn.artist_credit = ac.id
|
||||
JOIN recording r ON ac.id = r.artist_credit
|
||||
WHERE a.gid = %s
|
||||
"""
|
||||
self.cursor.execute(query, (artist['gid'],))
|
||||
total_result = self.cursor.fetchone()
|
||||
total_recordings = total_result['recording_count'] if total_result else 0
|
||||
|
||||
artist['has_recording'] = has_recording
|
||||
artist['total_recordings'] = total_recordings
|
||||
|
||||
status = "✅ HAS RECORDING" if has_recording else "❌ NO RECORDING"
|
||||
print(f" 📊 {artist['name']} ({artist['gid'][:8]}...): {total_recordings} recordings - {status}")
|
||||
|
||||
# Sort by: has recording first, then by score, then by total recordings
|
||||
unique_artists.sort(key=lambda x: (
|
||||
not x.get('has_recording', False), # Artists with recording first
|
||||
-x['score'], # Then by score (highest first)
|
||||
-x.get('total_recordings', 0) # Then by total recordings (highest first)
|
||||
))
|
||||
else:
|
||||
# Just get total recording counts for sorting
|
||||
for artist in unique_artists:
|
||||
query = """
|
||||
SELECT COUNT(r.id) as recording_count
|
||||
FROM artist a
|
||||
JOIN artist_credit_name acn ON a.id = acn.artist
|
||||
JOIN artist_credit ac ON acn.artist_credit = ac.id
|
||||
JOIN recording r ON ac.id = r.artist_credit
|
||||
WHERE a.gid = %s
|
||||
"""
|
||||
self.cursor.execute(query, (artist['gid'],))
|
||||
total_result = self.cursor.fetchone()
|
||||
total_recordings = total_result['recording_count'] if total_result else 0
|
||||
artist['total_recordings'] = total_recordings
|
||||
|
||||
# Sort by score and total recordings
|
||||
unique_artists.sort(key=lambda x: (-x['score'], -x['total_recordings']))
|
||||
|
||||
return unique_artists
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error in fuzzy_search_artist_candidates: {e}")
|
||||
return []
|
||||
|
||||
def get_artist_info(self, mbid: str) -> Optional[Dict[str, Any]]:
|
||||
"""Get detailed artist information by MBID."""
|
||||
if not self.connection:
|
||||
@ -532,201 +935,4 @@ class MusicBrainzDatabase:
|
||||
print(f"Database connection test failed: {e}")
|
||||
return False
|
||||
|
||||
def find_artist_credit(self, artist_name: str, title: str) -> Optional[Tuple[str, str, str]]:
|
||||
"""
|
||||
Find artist credit for collaboration.
|
||||
Returns (artist_credit_id, artist_string, recording_mbid) or None.
|
||||
"""
|
||||
if not self.connection:
|
||||
return None
|
||||
|
||||
try:
|
||||
# Clean artist name
|
||||
clean_name = artist_name.strip()
|
||||
|
||||
# Parse complex collaboration
|
||||
main_artist, collaborators = self._parse_complex_collaboration(clean_name)
|
||||
collaboration = len(collaborators) > 0
|
||||
|
||||
# Try with full title first, then with parenthetical content removed
|
||||
search_titles = [title.strip()]
|
||||
|
||||
# Create version with parenthetical content removed
|
||||
import re
|
||||
# Remove all parenthetical content like (clean), (remix), (karaoke), etc.
|
||||
clean_title = re.sub(r'\s*\([^)]*\)', '', title.strip())
|
||||
clean_title = clean_title.strip()
|
||||
if clean_title != title.strip():
|
||||
search_titles.append(clean_title)
|
||||
|
||||
# Also try with specific karaoke patterns removed
|
||||
karaoke_patterns = [
|
||||
r'\s*\(Karaoke Version\)',
|
||||
r'\s*\(Karaoke\)',
|
||||
r'\s*\(Instrumental\)',
|
||||
r'\s*\(Backing Track\)',
|
||||
]
|
||||
for pattern in karaoke_patterns:
|
||||
karaoke_clean = re.sub(pattern, '', title.strip(), flags=re.IGNORECASE)
|
||||
karaoke_clean = karaoke_clean.strip()
|
||||
if karaoke_clean != title.strip() and karaoke_clean not in search_titles:
|
||||
search_titles.append(karaoke_clean)
|
||||
|
||||
# Try to find recording with artist credit using all title variations
|
||||
for search_title in search_titles:
|
||||
query = """
|
||||
SELECT ac.id as artist_credit_id, r.gid as recording_mbid
|
||||
FROM recording r
|
||||
JOIN artist_credit ac ON r.artist_credit = ac.id
|
||||
JOIN artist_credit_name acn ON ac.id = acn.artist_credit
|
||||
JOIN artist a ON acn.artist = a.id
|
||||
WHERE a.name = %s AND r.name ILIKE %s
|
||||
LIMIT 1
|
||||
"""
|
||||
self.cursor.execute(query, (main_artist, f"%{search_title}%"))
|
||||
result = self.cursor.fetchone()
|
||||
|
||||
if result:
|
||||
break
|
||||
|
||||
if result:
|
||||
# Get the full artist string for this credit
|
||||
artist_query = """
|
||||
SELECT a.name, acn.join_phrase, acn.position
|
||||
FROM artist_credit_name acn
|
||||
JOIN artist a ON acn.artist = a.id
|
||||
WHERE acn.artist_credit = %s
|
||||
ORDER BY acn.position
|
||||
"""
|
||||
self.cursor.execute(artist_query, (result['artist_credit_id'],))
|
||||
artists = self.cursor.fetchall()
|
||||
|
||||
# Build the artist string
|
||||
artist_string = ""
|
||||
for artist in artists:
|
||||
artist_string += artist['name']
|
||||
if artist['join_phrase']:
|
||||
artist_string += artist['join_phrase']
|
||||
|
||||
return (str(result['artist_credit_id']), artist_string, result['recording_mbid'])
|
||||
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error in find_artist_credit: {e}")
|
||||
return None
|
||||
|
||||
def _parse_complex_collaboration(self, artist_string: str) -> Tuple[str, List[str]]:
|
||||
"""
|
||||
Parse complex collaboration strings like "Pitbull ft. Ne-Yo, Afrojack & Nayer"
|
||||
Returns: (main_artist, [collaborators])
|
||||
"""
|
||||
import re
|
||||
|
||||
# Primary collaboration indicators
|
||||
primary_patterns = ['ft.', 'feat.', 'featuring']
|
||||
|
||||
# Secondary collaboration indicators (need more careful handling)
|
||||
secondary_patterns = ['&', 'and']
|
||||
|
||||
# Check if this is a collaboration
|
||||
is_collaboration = False
|
||||
split_pattern = None
|
||||
|
||||
for pattern in primary_patterns:
|
||||
if pattern.lower() in artist_string.lower():
|
||||
is_collaboration = True
|
||||
# Find the actual pattern in the original string (case-insensitive)
|
||||
import re
|
||||
match = re.search(re.escape(pattern), artist_string, re.IGNORECASE)
|
||||
if match:
|
||||
split_pattern = match.group(0)
|
||||
else:
|
||||
split_pattern = pattern
|
||||
break
|
||||
|
||||
# If no primary collaboration found, check secondary patterns
|
||||
if not is_collaboration:
|
||||
for pattern in secondary_patterns:
|
||||
# Use word boundaries to avoid splitting within words like "Orlando"
|
||||
import re
|
||||
pattern_regex = r'\b' + re.escape(pattern) + r'\b'
|
||||
if re.search(pattern_regex, artist_string, re.IGNORECASE):
|
||||
# For secondary patterns, be more careful
|
||||
# Check if this looks like a band name vs collaboration
|
||||
parts = re.split(pattern_regex, artist_string, flags=re.IGNORECASE)
|
||||
if len(parts) == 2:
|
||||
part1 = parts[0].strip()
|
||||
part2 = parts[1].strip()
|
||||
|
||||
# Check if this looks like a band name vs collaboration
|
||||
# Band names typically have simple, short parts
|
||||
# Collaborations often have more complex artist names
|
||||
part1_words = len(part1.split())
|
||||
part2_words = len(part2.split())
|
||||
|
||||
# Check if this looks like a band name vs collaboration
|
||||
# Band names typically have simple, short parts
|
||||
# Collaborations often have more complex artist names
|
||||
|
||||
# If one part has a comma, it's likely a collaboration
|
||||
if ',' in part1 or ',' in part2:
|
||||
is_collaboration = True
|
||||
split_pattern = pattern
|
||||
break
|
||||
|
||||
# For "&" separators, be more permissive
|
||||
# Most "&" separators in music are collaborations, not band names
|
||||
# Only treat very obvious band names as non-collaborations
|
||||
if part1_words == 1 and part2_words == 1:
|
||||
# Check for very obvious band name patterns
|
||||
obvious_band_names = ['simon & garfunkel', 'hall & oates', 'brooks & dunn']
|
||||
if artist_string.lower() in obvious_band_names:
|
||||
# Likely a band name, skip
|
||||
continue
|
||||
|
||||
# Likely a collaboration
|
||||
is_collaboration = True
|
||||
split_pattern = pattern
|
||||
break
|
||||
|
||||
if not is_collaboration:
|
||||
return (artist_string, [])
|
||||
|
||||
# Split on the pattern
|
||||
parts = artist_string.split(split_pattern)
|
||||
if len(parts) < 2:
|
||||
return (artist_string, [])
|
||||
|
||||
main_artist = parts[0].strip()
|
||||
collaborators_string = split_pattern.join(parts[1:]).strip()
|
||||
|
||||
# Now parse the collaborators string which might have multiple separators
|
||||
collaborators = self._parse_collaborators(collaborators_string)
|
||||
|
||||
return (main_artist, collaborators)
|
||||
|
||||
def _parse_collaborators(self, collaborators_string: str) -> List[str]:
|
||||
"""
|
||||
Parse a string like "Ne-Yo, Afrojack & Nayer" into individual artists
|
||||
"""
|
||||
import re
|
||||
|
||||
if not collaborators_string:
|
||||
return []
|
||||
|
||||
# Split on common separators: comma, ampersand, "and"
|
||||
# Use regex to handle multiple separators
|
||||
# Use word boundaries to avoid splitting within words like "Orlando"
|
||||
separators = r'[,&]|\b(?:and)\b'
|
||||
|
||||
# Split and clean up each part
|
||||
parts = re.split(separators, collaborators_string, flags=re.IGNORECASE)
|
||||
|
||||
collaborators = []
|
||||
for part in parts:
|
||||
part = part.strip()
|
||||
if part: # Skip empty parts
|
||||
collaborators.append(part)
|
||||
|
||||
return collaborators
|
||||
|
||||
164
src/cli/artist_lookup_cli.py
Normal file
164
src/cli/artist_lookup_cli.py
Normal file
@ -0,0 +1,164 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Command-line interface for managing artist lookup table.
|
||||
Allows adding new artist variations and searching existing ones.
|
||||
"""
|
||||
|
||||
import sys
|
||||
import argparse
|
||||
from pathlib import Path
|
||||
from typing import List
|
||||
|
||||
# Add the src directory to the Python path
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent.parent / "src"))
|
||||
|
||||
from src.utils.artist_lookup import artist_lookup, add_artist_fallback
|
||||
|
||||
|
||||
def main():
|
||||
"""Main CLI function for artist lookup table management."""
|
||||
parser = argparse.ArgumentParser(
|
||||
description="MusicBrainz Data Cleaner - Artist Lookup Table Manager",
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog="""
|
||||
EXAMPLES:
|
||||
# Search for an artist in the lookup table
|
||||
python3 -m src.cli.artist_lookup_cli --search "Destiny's Child"
|
||||
|
||||
# Add a new artist variation
|
||||
python3 -m src.cli.artist_lookup_cli --add "New Artist" --mbid "12345678-1234-1234-1234-123456789abc" --variations "Artist, The Artist, Artist Band"
|
||||
|
||||
# List all artists in the lookup table
|
||||
python3 -m src.cli.artist_lookup_cli --list
|
||||
|
||||
# Show statistics
|
||||
python3 -m src.cli.artist_lookup_cli --stats
|
||||
"""
|
||||
)
|
||||
|
||||
# Create subparsers for different commands
|
||||
subparsers = parser.add_subparsers(dest='command', help='Available commands')
|
||||
|
||||
# Search command
|
||||
search_parser = subparsers.add_parser('search', help='Search for artists in lookup table')
|
||||
search_parser.add_argument('query', help='Artist name to search for')
|
||||
search_parser.add_argument('--min-score', type=float, default=0.6,
|
||||
help='Minimum fuzzy match score (0.0 to 1.0, default: 0.6)')
|
||||
|
||||
# Add command
|
||||
add_parser = subparsers.add_parser('add', help='Add new artist variation to lookup table')
|
||||
add_parser.add_argument('--canonical-name', required=True, help='Canonical artist name')
|
||||
add_parser.add_argument('--mbid', required=True, help='MusicBrainz ID')
|
||||
add_parser.add_argument('--variations', required=True,
|
||||
help='Comma-separated list of name variations')
|
||||
add_parser.add_argument('--notes', default='', help='Optional notes about the artist')
|
||||
|
||||
# List command
|
||||
list_parser = subparsers.add_parser('list', help='List all artists in lookup table')
|
||||
|
||||
# Stats command
|
||||
stats_parser = subparsers.add_parser('stats', help='Show lookup table statistics')
|
||||
|
||||
# Parse arguments
|
||||
args = parser.parse_args()
|
||||
|
||||
if not args.command:
|
||||
parser.print_help()
|
||||
return
|
||||
|
||||
# Execute commands
|
||||
if args.command == 'search':
|
||||
search_artists(args.query, args.min_score)
|
||||
elif args.command == 'add':
|
||||
add_artist(args.canonical_name, args.mbid, args.variations, args.notes)
|
||||
elif args.command == 'list':
|
||||
list_artists()
|
||||
elif args.command == 'stats':
|
||||
show_statistics()
|
||||
|
||||
|
||||
def search_artists(query: str, min_score: float):
|
||||
"""Search for artists in the lookup table."""
|
||||
print(f"🔍 Searching for '{query}' in artist lookup table...")
|
||||
print(f"📊 Minimum score: {min_score}")
|
||||
print("-" * 60)
|
||||
|
||||
results = artist_lookup.search_artists(query, min_score)
|
||||
|
||||
if not results:
|
||||
print("❌ No matches found")
|
||||
return
|
||||
|
||||
print(f"✅ Found {len(results)} matches:")
|
||||
print()
|
||||
|
||||
for i, result in enumerate(results, 1):
|
||||
print(f"{i}. {result['canonical_name']}")
|
||||
print(f" MBID: {result['mbid']}")
|
||||
print(f" Score: {result['score']:.2f}")
|
||||
print(f" Match Type: {result['match_type']}")
|
||||
if result.get('matched_variation'):
|
||||
print(f" Matched Variation: {result['matched_variation']}")
|
||||
if result.get('notes'):
|
||||
print(f" Notes: {result['notes']}")
|
||||
print()
|
||||
|
||||
|
||||
def add_artist(canonical_name: str, mbid: str, variations_str: str, notes: str):
|
||||
"""Add a new artist variation to the lookup table."""
|
||||
print(f"➕ Adding artist variation to lookup table...")
|
||||
print(f"📝 Canonical Name: {canonical_name}")
|
||||
print(f"🆔 MBID: {mbid}")
|
||||
print(f"📋 Notes: {notes}")
|
||||
|
||||
# Parse variations
|
||||
variations = [v.strip() for v in variations_str.split(',') if v.strip()]
|
||||
print(f"🔄 Variations: {', '.join(variations)}")
|
||||
print()
|
||||
|
||||
# Add to lookup table
|
||||
success = add_artist_fallback(canonical_name, mbid, variations, notes)
|
||||
|
||||
if success:
|
||||
print("✅ Artist variation added successfully!")
|
||||
|
||||
# Show updated statistics
|
||||
stats = artist_lookup.get_statistics()
|
||||
print(f"📊 Updated statistics: {stats['total_artists']} artists, {stats['total_variations']} variations")
|
||||
else:
|
||||
print("❌ Failed to add artist variation")
|
||||
|
||||
|
||||
def list_artists():
|
||||
"""List all artists in the lookup table."""
|
||||
artists = artist_lookup.list_artists()
|
||||
|
||||
if not artists:
|
||||
print("📚 Artist lookup table is empty")
|
||||
return
|
||||
|
||||
print(f"📚 Found {len(artists)} artists in lookup table:")
|
||||
print("-" * 60)
|
||||
|
||||
for i, artist in enumerate(sorted(artists), 1):
|
||||
print(f"{i}. {artist}")
|
||||
|
||||
|
||||
def show_statistics():
|
||||
"""Show lookup table statistics."""
|
||||
stats = artist_lookup.get_statistics()
|
||||
|
||||
print("📊 Artist Lookup Table Statistics")
|
||||
print("=" * 40)
|
||||
print(f"Total Artists: {stats['total_artists']}")
|
||||
print(f"Total Variations: {stats['total_variations']}")
|
||||
|
||||
if stats.get('metadata'):
|
||||
metadata = stats['metadata']
|
||||
print(f"Version: {metadata.get('version', 'Unknown')}")
|
||||
print(f"Last Updated: {metadata.get('last_updated', 'Unknown')}")
|
||||
print(f"Description: {metadata.get('description', 'No description')}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
860
src/cli/main.py
860
src/cli/main.py
@ -7,9 +7,9 @@ Now uses direct database access with fuzzy search for better performance.
|
||||
import sys
|
||||
import json
|
||||
import time
|
||||
import re
|
||||
from pathlib import Path
|
||||
from typing import Dict, Optional, Any, Tuple, List
|
||||
from datetime import datetime
|
||||
|
||||
# Import constants
|
||||
from ..config.constants import (
|
||||
@ -21,416 +21,245 @@ from ..config.constants import (
|
||||
ExitCode
|
||||
)
|
||||
|
||||
# Import data loader
|
||||
from ..utils.data_loader import data_loader
|
||||
|
||||
# Import database and API clients
|
||||
from ..api.database import MusicBrainzDatabase
|
||||
from ..api.api_client import MusicBrainzAPIClient
|
||||
|
||||
# Import core components
|
||||
from ..core.song_processor import SongProcessor
|
||||
from ..core.factory import DataProviderFactory
|
||||
|
||||
|
||||
class MusicBrainzCleaner:
|
||||
"""Enhanced MusicBrainz Cleaner with direct database access and fuzzy search."""
|
||||
"""Enhanced MusicBrainz Cleaner with interface-based data access."""
|
||||
|
||||
def __init__(self, use_database: bool = True, base_url: str = DEFAULT_MUSICBRAINZ_URL):
|
||||
def __init__(self, use_database: bool = True, base_url: str = DEFAULT_MUSICBRAINZ_URL, force: bool = False, no_sort: bool = False):
|
||||
self.use_database = use_database
|
||||
self.base_url = base_url
|
||||
self.force = force
|
||||
self.no_sort = no_sort
|
||||
|
||||
# Initialize database connection (primary method)
|
||||
if use_database:
|
||||
self.db = MusicBrainzDatabase()
|
||||
if not self.db.connect():
|
||||
print("⚠️ Database connection failed, falling back to API")
|
||||
self.use_database = False
|
||||
# Create data provider using factory
|
||||
self.data_provider = DataProviderFactory.create_provider(use_database, base_url)
|
||||
|
||||
# Initialize API client (fallback method)
|
||||
if not self.use_database:
|
||||
self.api = MusicBrainzAPIClient(base_url)
|
||||
# Initialize centralized song processor with the data provider
|
||||
self.song_processor = SongProcessor(self.data_provider)
|
||||
|
||||
def _clean_artist_name(self, artist_name: str) -> str:
|
||||
name = artist_name.strip()
|
||||
|
||||
# Load name variations from data file
|
||||
variations = data_loader.load_name_variations()
|
||||
|
||||
for old, new in variations.items():
|
||||
name = re.sub(rf'\b{re.escape(old)}\b', new, name, flags=re.IGNORECASE)
|
||||
|
||||
return name
|
||||
|
||||
def _clean_title(self, title: str) -> str:
|
||||
def clean_song(self, song: Dict[str, Any]) -> Tuple[Dict[str, Any], bool]:
|
||||
"""
|
||||
Clean title by removing karaoke patterns.
|
||||
Note: The database search now tries multiple title variations including
|
||||
the full title and versions with parenthetical content removed.
|
||||
Clean a single song using the centralized song processor.
|
||||
Returns (cleaned_song, success_status)
|
||||
"""
|
||||
title = title.strip()
|
||||
karaoke_patterns = [
|
||||
r'\s*\(Karaoke Version\)',
|
||||
r'\s*\(Karaoke\)',
|
||||
r'\s*\(Instrumental\)',
|
||||
r'\s*\(Backing Track\)',
|
||||
]
|
||||
for pattern in karaoke_patterns:
|
||||
title = re.sub(pattern, '', title, flags=re.IGNORECASE)
|
||||
return title.strip()
|
||||
|
||||
def find_artist_mbid(self, artist_name: str) -> Optional[str]:
|
||||
clean_name = self._clean_artist_name(artist_name)
|
||||
song_copy = song.copy()
|
||||
|
||||
# Handle collaborations - prioritize finding artist credit
|
||||
# Use the same complex collaboration parsing as the database
|
||||
main_artist, collaborators = self._parse_complex_collaboration(clean_name)
|
||||
has_collaboration = len(collaborators) > 0
|
||||
|
||||
if has_collaboration:
|
||||
# For collaborations, we'll handle this in find_recording_mbid
|
||||
# by using the artist credit approach
|
||||
# Return None here so we can find the full collaboration later
|
||||
return None
|
||||
|
||||
# Try fuzzy search for full artist name
|
||||
if self.use_database:
|
||||
result = self.db.fuzzy_search_artist(clean_name)
|
||||
if result and isinstance(result, tuple) and len(result) >= 2:
|
||||
return result[1] # Return MBID from tuple (artist_name, mbid, score)
|
||||
else:
|
||||
# Fallback to API
|
||||
try:
|
||||
result = self.api.search_artist(clean_name)
|
||||
if result:
|
||||
return result['id']
|
||||
except:
|
||||
pass
|
||||
|
||||
return None
|
||||
|
||||
def find_recording_mbid(self, artist_mbid: str, title: str) -> Optional[str]:
|
||||
clean_title = self._clean_title(title)
|
||||
|
||||
# Load known recordings data first
|
||||
known_recordings = data_loader.load_known_recordings()
|
||||
|
||||
# Check known recordings first
|
||||
for (known_artist_mbid, known_title), recording_mbid in known_recordings.items():
|
||||
if artist_mbid == known_artist_mbid and clean_title.lower() == known_title.lower():
|
||||
return recording_mbid
|
||||
|
||||
# Handle collaborations using artist credit
|
||||
if self.use_database:
|
||||
# If no artist_mbid (collaboration case), try to find by title and original artist name
|
||||
if not artist_mbid:
|
||||
# This is a collaboration case, try to find by title
|
||||
result = self.db.fuzzy_search_recording(clean_title)
|
||||
if result and isinstance(result, tuple) and len(result) >= 2:
|
||||
return result[1] # Return MBID from tuple (recording_name, mbid, score)
|
||||
else:
|
||||
# Regular case with artist_mbid
|
||||
result = self.db.fuzzy_search_recording(clean_title, artist_mbid)
|
||||
if result and isinstance(result, tuple) and len(result) >= 2:
|
||||
return result[1] # Return MBID from tuple (recording_name, mbid, score)
|
||||
else:
|
||||
# Fallback to API
|
||||
try:
|
||||
result = self.api.search_recording(clean_title, artist_mbid)
|
||||
if result:
|
||||
return result['id']
|
||||
except:
|
||||
pass
|
||||
|
||||
return None
|
||||
|
||||
def get_artist_info(self, mbid: str) -> Optional[Dict[str, Any]]:
|
||||
if self.use_database:
|
||||
return self.db.get_artist_info(mbid)
|
||||
else:
|
||||
return self.api.get_artist_info(mbid)
|
||||
|
||||
def get_recording_info(self, mbid: str) -> Optional[Dict[str, Any]]:
|
||||
if self.use_database:
|
||||
return self.db.get_recording_info(mbid)
|
||||
else:
|
||||
return self.api.get_recording_info(mbid)
|
||||
|
||||
def _build_artist_string(self, artist_credit: list) -> str:
|
||||
if not artist_credit:
|
||||
return ""
|
||||
parts = []
|
||||
for credit in artist_credit:
|
||||
parts.append(credit['name'])
|
||||
if credit.get('joinphrase'):
|
||||
parts.append(credit['joinphrase'])
|
||||
return ''.join(parts).strip()
|
||||
|
||||
def _parse_complex_collaboration(self, artist_string: str) -> Tuple[str, List[str]]:
|
||||
"""
|
||||
Parse complex collaboration strings like "Pitbull ft. Ne-Yo, Afrojack & Nayer"
|
||||
Returns: (main_artist, [collaborators])
|
||||
"""
|
||||
import re
|
||||
|
||||
# Primary collaboration indicators
|
||||
primary_patterns = ['ft.', 'feat.', 'featuring']
|
||||
|
||||
# Secondary collaboration indicators (need more careful handling)
|
||||
secondary_patterns = ['&', 'and']
|
||||
|
||||
# Check if this is a collaboration
|
||||
is_collaboration = False
|
||||
split_pattern = None
|
||||
|
||||
for pattern in primary_patterns:
|
||||
if pattern.lower() in artist_string.lower():
|
||||
is_collaboration = True
|
||||
# Find the actual pattern in the original string (case-insensitive)
|
||||
import re
|
||||
match = re.search(re.escape(pattern), artist_string, re.IGNORECASE)
|
||||
if match:
|
||||
split_pattern = match.group(0)
|
||||
else:
|
||||
split_pattern = pattern
|
||||
break
|
||||
|
||||
# If no primary collaboration found, check secondary patterns
|
||||
if not is_collaboration:
|
||||
for pattern in secondary_patterns:
|
||||
# Use word boundaries to avoid splitting within words like "Orlando"
|
||||
pattern_regex = r'\b' + re.escape(pattern) + r'\b'
|
||||
if re.search(pattern_regex, artist_string, re.IGNORECASE):
|
||||
# For secondary patterns, be more careful
|
||||
# Check if this looks like a band name vs collaboration
|
||||
parts = re.split(pattern_regex, artist_string, flags=re.IGNORECASE)
|
||||
if len(parts) == 2:
|
||||
part1 = parts[0].strip()
|
||||
part2 = parts[1].strip()
|
||||
|
||||
# Check if this looks like a band name vs collaboration
|
||||
# Band names typically have simple, short parts
|
||||
# Collaborations often have more complex artist names
|
||||
part1_words = len(part1.split())
|
||||
part2_words = len(part2.split())
|
||||
|
||||
# Check if this looks like a band name vs collaboration
|
||||
# Band names typically have simple, short parts
|
||||
# Collaborations often have more complex artist names
|
||||
|
||||
# If one part has a comma, it's likely a collaboration
|
||||
if ',' in part1 or ',' in part2:
|
||||
is_collaboration = True
|
||||
split_pattern = pattern
|
||||
break
|
||||
|
||||
# For "&" separators, be more permissive
|
||||
# Most "&" separators in music are collaborations, not band names
|
||||
# Only treat very obvious band names as non-collaborations
|
||||
if part1_words == 1 and part2_words == 1:
|
||||
# Check for very obvious band name patterns
|
||||
obvious_band_names = ['simon & garfunkel', 'hall & oates', 'brooks & dunn']
|
||||
if artist_string.lower() in obvious_band_names:
|
||||
# Likely a band name, skip
|
||||
continue
|
||||
|
||||
# Likely a collaboration
|
||||
is_collaboration = True
|
||||
split_pattern = pattern
|
||||
break
|
||||
|
||||
if not is_collaboration:
|
||||
return (artist_string, [])
|
||||
|
||||
# Split on the pattern
|
||||
parts = artist_string.split(split_pattern)
|
||||
if len(parts) < 2:
|
||||
return (artist_string, [])
|
||||
|
||||
main_artist = parts[0].strip()
|
||||
collaborators_string = split_pattern.join(parts[1:]).strip()
|
||||
|
||||
# Now parse the collaborators string which might have multiple separators
|
||||
collaborators = self._parse_collaborators(collaborators_string)
|
||||
|
||||
return (main_artist, collaborators)
|
||||
|
||||
def _parse_collaborators(self, collaborators_string: str) -> List[str]:
|
||||
"""
|
||||
Parse a string like "Ne-Yo, Afrojack & Nayer" into individual artists
|
||||
"""
|
||||
import re
|
||||
|
||||
if not collaborators_string:
|
||||
return []
|
||||
|
||||
# Split on common separators: comma, ampersand, "and"
|
||||
# Use regex to handle multiple separators
|
||||
# Use word boundaries to avoid splitting within words like "Orlando"
|
||||
separators = r'[,&]|\b(?:and)\b'
|
||||
|
||||
# Split and clean up each part
|
||||
parts = re.split(separators, collaborators_string, flags=re.IGNORECASE)
|
||||
|
||||
collaborators = []
|
||||
for part in parts:
|
||||
part = part.strip()
|
||||
if part: # Skip empty parts
|
||||
collaborators.append(part)
|
||||
|
||||
return collaborators
|
||||
|
||||
def clean_song(self, song: Dict[str, Any]) -> Dict[str, Any]:
|
||||
print(f"Processing: {song.get('artist', 'Unknown')} - {song.get('title', 'Unknown')}")
|
||||
|
||||
# Find artist MBID
|
||||
artist_mbid = self.find_artist_mbid(song.get('artist', ''))
|
||||
|
||||
# Handle collaboration case (artist_mbid is None)
|
||||
# Use the same complex collaboration parsing as the database
|
||||
artist_name = song.get('artist', '')
|
||||
main_artist, collaborators = self._parse_complex_collaboration(artist_name)
|
||||
has_collaboration = len(collaborators) > 0
|
||||
|
||||
if artist_mbid is None and has_collaboration:
|
||||
print(f" 🎯 Collaboration detected: {song.get('artist')}")
|
||||
# Try to find recording using artist credit approach
|
||||
if self.use_database:
|
||||
result = self.db.find_artist_credit(song.get('artist', ''), song.get('title', ''))
|
||||
if result:
|
||||
artist_credit_id, artist_string, recording_mbid = result
|
||||
print(f" ✅ Found recording: {song.get('title')} (MBID: {recording_mbid})")
|
||||
|
||||
# Update with the correct artist credit
|
||||
song['artist'] = artist_string
|
||||
song['recording_mbid'] = recording_mbid
|
||||
|
||||
# For collaborations, try to get the main artist's MBID
|
||||
if self.use_database:
|
||||
main_artist, collaborators = self._parse_complex_collaboration(song.get('artist', ''))
|
||||
if main_artist:
|
||||
artist_result = self.db.fuzzy_search_artist(main_artist)
|
||||
if artist_result and isinstance(artist_result, tuple) and len(artist_result) >= 2:
|
||||
song['mbid'] = artist_result[1] # Set the main artist's MBID
|
||||
|
||||
print(f" ✅ Updated to: {song['artist']} - {song.get('title')}")
|
||||
return song
|
||||
else:
|
||||
print(f" ❌ Could not find recording: {song.get('title')}")
|
||||
return song
|
||||
else:
|
||||
# Fallback to API method
|
||||
recording_mbid = self.find_recording_mbid(None, song.get('title', ''))
|
||||
if recording_mbid:
|
||||
recording_info = self.get_recording_info(recording_mbid)
|
||||
if recording_info and recording_info.get('artist-credit'):
|
||||
artist_string = self._build_artist_string(recording_info['artist-credit'])
|
||||
if artist_string:
|
||||
song['artist'] = artist_string
|
||||
print(f" ✅ Updated to: {song['artist']} - {recording_info['title']}")
|
||||
song['title'] = recording_info['title']
|
||||
song['recording_mbid'] = recording_mbid
|
||||
return song
|
||||
else:
|
||||
print(f" ❌ Could not find recording: {song.get('title')}")
|
||||
return song
|
||||
|
||||
# Regular case (non-collaboration or collaboration not found)
|
||||
if not artist_mbid:
|
||||
print(f" ❌ Could not find artist: {song.get('artist')}")
|
||||
return song
|
||||
|
||||
# Get artist info
|
||||
artist_info = self.get_artist_info(artist_mbid)
|
||||
if artist_info:
|
||||
print(f" ✅ Found artist: {artist_info['name']} (MBID: {artist_mbid})")
|
||||
song['artist'] = artist_info['name']
|
||||
song['mbid'] = artist_mbid
|
||||
|
||||
# Find recording MBID
|
||||
recording_mbid = self.find_recording_mbid(artist_mbid, song.get('title', ''))
|
||||
if not recording_mbid:
|
||||
print(f" ❌ Could not find recording: {song.get('title')}")
|
||||
return song
|
||||
|
||||
# Get recording info
|
||||
recording_info = self.get_recording_info(recording_mbid)
|
||||
if recording_info:
|
||||
print(f" ✅ Found recording: {recording_info['title']} (MBID: {recording_mbid})")
|
||||
|
||||
# Update artist string if there are multiple artists, but preserve the artist MBID
|
||||
if self.use_database and recording_info.get('artist_credit'):
|
||||
song['artist'] = recording_info['artist_credit']
|
||||
# Keep the original artist MBID even when updating artist name
|
||||
if 'mbid' not in song:
|
||||
song['mbid'] = artist_mbid
|
||||
elif not self.use_database and recording_info.get('artist-credit'):
|
||||
artist_string = self._build_artist_string(recording_info['artist-credit'])
|
||||
if artist_string:
|
||||
song['artist'] = artist_string
|
||||
# Keep the original artist MBID even when updating artist name
|
||||
if 'mbid' not in song:
|
||||
song['mbid'] = artist_mbid
|
||||
|
||||
song['title'] = recording_info['title']
|
||||
song['recording_mbid'] = recording_mbid
|
||||
|
||||
print(f" ✅ Updated to: {song['artist']} - {song['title']}")
|
||||
return song
|
||||
|
||||
def clean_songs_file(self, input_file: Path, output_file: Optional[Path] = None, limit: Optional[int] = None) -> Path:
|
||||
try:
|
||||
# Read input file
|
||||
with open(input_file, 'r', encoding='utf-8') as f:
|
||||
songs = json.load(f)
|
||||
|
||||
if not isinstance(songs, list):
|
||||
print("Error: Input file should contain a JSON array of songs")
|
||||
return input_file
|
||||
|
||||
# Apply limit if specified
|
||||
if limit is not None:
|
||||
songs = songs[:limit]
|
||||
print(f"⚠️ Limiting processing to first {limit} songs")
|
||||
|
||||
# Determine output path
|
||||
if output_file is None:
|
||||
output_file = input_file.parent / f"{input_file.stem}_cleaned.json"
|
||||
|
||||
print(f"Processing {len(songs)} songs...")
|
||||
print(f"Using {'database' if self.use_database else 'API'} connection")
|
||||
print(PROGRESS_SEPARATOR)
|
||||
|
||||
# Clean each song
|
||||
cleaned_songs = []
|
||||
for i, song in enumerate(songs, 1):
|
||||
print(f"\n[{i}/{len(songs)}]", end=" ")
|
||||
cleaned_song = self.clean_song(song)
|
||||
cleaned_songs.append(cleaned_song)
|
||||
# If force flag is set, remove ALL existing MBIDs to force complete reprocessing
|
||||
if self.force:
|
||||
song_copy.pop('mbid', None)
|
||||
song_copy.pop('recording_mbid', None)
|
||||
|
||||
# Only add delay for API calls, not database queries
|
||||
if not self.use_database:
|
||||
time.sleep(API_REQUEST_DELAY)
|
||||
return self.song_processor.clean_song(song_copy)
|
||||
|
||||
def process_songs(self, source_file: Path, output_success: Path = None, output_failure: Path = None, limit: Optional[int] = None) -> Dict[str, Any]:
|
||||
"""
|
||||
Process songs from source file and save successful and failed songs to separate files.
|
||||
This is the main processing method that handles full dataset processing by default.
|
||||
"""
|
||||
if not source_file.exists():
|
||||
print(f'❌ Source file not found: {source_file}')
|
||||
return {}
|
||||
|
||||
print('🚀 Starting song processing...')
|
||||
|
||||
# Load songs
|
||||
with open(source_file, 'r') as f:
|
||||
all_songs = json.load(f)
|
||||
|
||||
if not isinstance(all_songs, list):
|
||||
print("Error: Source file should contain a JSON array of songs")
|
||||
return {}
|
||||
|
||||
# Sort songs by artist for better performance and organization
|
||||
original_count = len(all_songs)
|
||||
if not self.no_sort:
|
||||
print("🔄 Sorting songs by artist for optimal processing...")
|
||||
all_songs.sort(key=lambda song: song.get('artist', '').lower())
|
||||
print(f"✅ Songs sorted by artist")
|
||||
else:
|
||||
print("ℹ️ Preserving original song order (sorting disabled)")
|
||||
|
||||
# Apply limit if specified (after sorting)
|
||||
if limit is not None:
|
||||
all_songs = all_songs[:limit]
|
||||
print(f"⚠️ Limiting processing to first {limit} songs (from {original_count:,} total)")
|
||||
|
||||
total_songs = len(all_songs)
|
||||
print(f'📊 Total songs to process: {total_songs:,}')
|
||||
|
||||
# Check if file is empty
|
||||
if total_songs == 0:
|
||||
print('⚠️ No songs to process - file is empty')
|
||||
return {}
|
||||
|
||||
# Write output file
|
||||
with open(output_file, 'w', encoding='utf-8') as f:
|
||||
json.dump(cleaned_songs, f, indent=2, ensure_ascii=False)
|
||||
print(f'Using {"database" if self.use_database else "API"} connection')
|
||||
if self.force:
|
||||
print('🔄 Force mode enabled - ignoring all existing MBIDs')
|
||||
print(PROGRESS_SEPARATOR)
|
||||
|
||||
# Initialize arrays for batch processing
|
||||
successful_songs = []
|
||||
failed_songs = []
|
||||
|
||||
# Statistics tracking
|
||||
stats = {
|
||||
'total_processed': 0,
|
||||
'artists_found': 0,
|
||||
'recordings_found': 0,
|
||||
'start_time': time.time()
|
||||
}
|
||||
|
||||
# Process each song
|
||||
for i, song in enumerate(all_songs, 1):
|
||||
try:
|
||||
result = self.clean_song(song)
|
||||
cleaned_song, success = result
|
||||
|
||||
artist_found = 'mbid' in cleaned_song
|
||||
recording_found = 'recording_mbid' in cleaned_song
|
||||
|
||||
# Display progress with counter and status
|
||||
artist_name = song.get('artist', 'Unknown')
|
||||
title = song.get('title', 'Unknown')
|
||||
|
||||
if artist_found and recording_found:
|
||||
stats['artists_found'] += 1
|
||||
stats['recordings_found'] += 1
|
||||
successful_songs.append(cleaned_song)
|
||||
print(f'[{i:,} of {total_songs:,}] ✅ PASS: {artist_name} - {title}')
|
||||
else:
|
||||
# Keep the original song in failed_songs array (same format as source)
|
||||
failed_songs.append(song)
|
||||
print(f'[{i:,} of {total_songs:,}] ❌ FAIL: {artist_name} - {title}')
|
||||
|
||||
stats['total_processed'] += 1
|
||||
|
||||
# Progress update every 100 songs
|
||||
if i % 100 == 0:
|
||||
elapsed = time.time() - stats['start_time']
|
||||
rate = i / elapsed if elapsed > 0 else 0
|
||||
success_rate = (stats['artists_found'] / i * 100) if i > 0 else 0
|
||||
print(f' 📈 Progress: {i:,}/{total_songs:,} ({i/total_songs*100:.1f}%) - '
|
||||
f'Success: {success_rate:.1f}% - Rate: {rate:.1f} songs/sec')
|
||||
|
||||
except Exception as e:
|
||||
print(f' ❌ Error processing song {i}: {e}')
|
||||
# Keep the original song in failed_songs array
|
||||
failed_songs.append(song)
|
||||
stats['total_processed'] += 1
|
||||
|
||||
print(f"\n{PROGRESS_SEPARATOR}")
|
||||
print(SUCCESS_MESSAGES['processing_complete'])
|
||||
print(SUCCESS_MESSAGES['output_saved'].format(file_path=output_file))
|
||||
# Only add delay for API calls, not database queries
|
||||
if not self.use_database:
|
||||
time.sleep(API_REQUEST_DELAY)
|
||||
|
||||
# Determine output file paths
|
||||
if output_success is None:
|
||||
output_success = source_file.parent / f"{source_file.stem}-success.json"
|
||||
if output_failure is None:
|
||||
output_failure = source_file.parent / f"{source_file.stem}-failure.json"
|
||||
|
||||
# Save successful songs (array format, same as source)
|
||||
with open(output_success, 'w', encoding='utf-8') as f:
|
||||
json.dump(successful_songs, f, indent=2, ensure_ascii=False)
|
||||
|
||||
# Save failed songs (array format, same as source)
|
||||
with open(output_failure, 'w', encoding='utf-8') as f:
|
||||
json.dump(failed_songs, f, indent=2, ensure_ascii=False)
|
||||
|
||||
# Calculate final statistics
|
||||
total_time = time.time() - stats['start_time']
|
||||
|
||||
# Create human-readable text report
|
||||
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
report_file = source_file.parent / f"processing_report_{timestamp}.txt"
|
||||
|
||||
with open(report_file, 'w', encoding='utf-8') as f:
|
||||
f.write("MusicBrainz Data Cleaner - Processing Report\n")
|
||||
f.write("=" * 50 + "\n\n")
|
||||
f.write(f"Source File: {source_file}\n")
|
||||
f.write(f"Processing Date: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
|
||||
f.write(f"Processing Time: {total_time:.1f} seconds\n\n")
|
||||
|
||||
return output_file
|
||||
f.write("SUMMARY\n")
|
||||
f.write("-" * 20 + "\n")
|
||||
f.write(f"Total Songs Processed: {stats['total_processed']:,}\n")
|
||||
f.write(f"Successful Songs: {len(successful_songs):,}\n")
|
||||
f.write(f"Failed Songs: {len(failed_songs):,}\n")
|
||||
f.write(f"Success Rate: {(len(successful_songs)/stats['total_processed']*100):.1f}%" if stats['total_processed'] > 0 else "Success Rate: 0.0%")
|
||||
f.write("\n\n")
|
||||
|
||||
except FileNotFoundError:
|
||||
print(f"Error: File '{input_file}' not found")
|
||||
return input_file
|
||||
except json.JSONDecodeError:
|
||||
print(f"Error: Invalid JSON in file '{input_file}'")
|
||||
return input_file
|
||||
except Exception as e:
|
||||
print(f"Error processing file: {e}")
|
||||
return input_file
|
||||
finally:
|
||||
# Clean up database connection
|
||||
if self.use_database and hasattr(self, 'db'):
|
||||
self.db.disconnect()
|
||||
f.write("DETAILED STATISTICS\n")
|
||||
f.write("-" * 20 + "\n")
|
||||
f.write(f"Artists Found: {stats['artists_found']:,}/{stats['total_processed']:,} ({(stats['artists_found']/stats['total_processed']*100):.1f}%" if stats['total_processed'] > 0 else f"Artists Found: {stats['artists_found']:,}/{stats['total_processed']:,} (0.0%")
|
||||
f.write(")\n")
|
||||
f.write(f"Recordings Found: {stats['recordings_found']:,}/{stats['total_processed']:,} ({(stats['recordings_found']/stats['total_processed']*100):.1f}%" if stats['total_processed'] > 0 else f"Recordings Found: {stats['recordings_found']:,}/{stats['total_processed']:,} (0.0%")
|
||||
f.write(")\n")
|
||||
f.write(f"Processing Speed: {stats['total_processed'] / total_time:.1f} songs/second" if total_time > 0 else "Processing Speed: 0.0 songs/second")
|
||||
f.write("\n\n")
|
||||
|
||||
f.write("OUTPUT FILES\n")
|
||||
f.write("-" * 20 + "\n")
|
||||
f.write(f"Successful Songs: {output_success}\n")
|
||||
f.write(f"Failed Songs: {output_failure}\n")
|
||||
f.write(f"Report File: {report_file}\n\n")
|
||||
|
||||
if len(failed_songs) > 0:
|
||||
f.write("FAILED SONGS (First 50)\n")
|
||||
f.write("-" * 20 + "\n")
|
||||
for i, song in enumerate(failed_songs[:50], 1):
|
||||
artist = song.get('artist', 'Unknown')
|
||||
title = song.get('title', 'Unknown')
|
||||
f.write(f"{i:3d}. {artist} - {title}\n")
|
||||
|
||||
if len(failed_songs) > 50:
|
||||
f.write(f"... and {len(failed_songs) - 50} more failed songs\n")
|
||||
|
||||
# Save detailed JSON report for programmatic access
|
||||
json_report_file = source_file.parent / f"processing_report_{timestamp}.json"
|
||||
final_stats = {
|
||||
'summary': {
|
||||
'total_tested': stats['total_processed'],
|
||||
'artists_found': stats['artists_found'],
|
||||
'recordings_found': stats['recordings_found'],
|
||||
'failed_count': len(failed_songs),
|
||||
'artist_success_rate': (stats['artists_found'] / stats['total_processed'] * 100) if stats['total_processed'] > 0 else 0,
|
||||
'recording_success_rate': (stats['recordings_found'] / stats['total_processed'] * 100) if stats['total_processed'] > 0 else 0,
|
||||
'processing_time_seconds': total_time,
|
||||
'songs_per_second': stats['total_processed'] / total_time if total_time > 0 else 0
|
||||
},
|
||||
'files': {
|
||||
'source': str(source_file),
|
||||
'successful_songs': str(output_success),
|
||||
'failed_songs': str(output_failure),
|
||||
'text_report': str(report_file),
|
||||
'json_report': str(json_report_file)
|
||||
}
|
||||
}
|
||||
|
||||
with open(json_report_file, 'w') as f:
|
||||
json.dump(final_stats, f, indent=2)
|
||||
|
||||
print(f'\n{PROGRESS_SEPARATOR}')
|
||||
print(f'🎉 Processing completed!')
|
||||
print(f'📊 Final Results:')
|
||||
print(f' ⏱️ Total processing time: {total_time:.1f} seconds')
|
||||
print(f' 🚀 Average speed: {stats["total_processed"] / total_time:.1f} songs/second' if total_time > 0 else ' 🚀 Average speed: 0.0 songs/second')
|
||||
print(f' ✅ Artists found: {stats["artists_found"]:,}/{stats["total_processed"]:,} ({stats["artists_found"]/stats["total_processed"]*100:.1f}%)' if stats["total_processed"] > 0 else f' ✅ Artists found: {stats["artists_found"]:,}/{stats["total_processed"]:,} (0.0%)')
|
||||
print(f' ✅ Recordings found: {stats["recordings_found"]:,}/{stats["total_processed"]:,} ({stats["recordings_found"]/stats["total_processed"]*100:.1f}%)' if stats["total_processed"] > 0 else f' ✅ Recordings found: {stats["recordings_found"]:,}/{stats["total_processed"]:,} (0.0%)')
|
||||
print(f' ❌ Failed songs: {len(failed_songs):,} ({len(failed_songs)/stats["total_processed"]*100:.1f}%)' if stats["total_processed"] > 0 else f' ❌ Failed songs: {len(failed_songs):,} (0.0%)')
|
||||
print(f'📄 Files saved:')
|
||||
print(f' ✅ Successful songs: {output_success}')
|
||||
print(f' ❌ Failed songs: {output_failure}')
|
||||
print(f' 📋 Text report: {report_file}')
|
||||
print(f' 📊 JSON report: {json_report_file}')
|
||||
|
||||
return final_stats
|
||||
|
||||
|
||||
def print_help() -> None:
|
||||
@ -438,25 +267,48 @@ def print_help() -> None:
|
||||
MusicBrainz Data Cleaner - Clean and normalize song data using MusicBrainz
|
||||
|
||||
USAGE:
|
||||
musicbrainz-cleaner <input_file.json> [output_file.json] [options]
|
||||
musicbrainz-cleaner [options]
|
||||
|
||||
ARGUMENTS:
|
||||
input_file.json JSON file containing array of song objects
|
||||
output_file.json Optional: Output file for cleaned data
|
||||
DEFAULT BEHAVIOR:
|
||||
Songs are automatically sorted by artist name for optimal performance.
|
||||
Use --no-sort to preserve the original order.
|
||||
|
||||
OPTIONS:
|
||||
--help, -h Show this help message
|
||||
--version, -v Show version information
|
||||
--test-connection Test connection to MusicBrainz server
|
||||
--limit N Process only the first N songs (for testing)
|
||||
--use-api Force use of HTTP API instead of direct database access
|
||||
--source FILE Source JSON file (default: data/songs.json)
|
||||
--output-success FILE Output file for successful songs (default: source-success.json)
|
||||
--output-failure FILE Output file for failed songs (default: source-failure.json)
|
||||
--limit N Process only the first N songs (default: all songs)
|
||||
--use-api Force use of HTTP API instead of direct database access
|
||||
--force Force reprocessing, ignoring all existing MBIDs
|
||||
--no-sort Disable artist sorting (preserve original order)
|
||||
--test-connection Test connection to MusicBrainz server
|
||||
--help, -h Show this help message
|
||||
--version, -v Show version information
|
||||
|
||||
EXAMPLES:
|
||||
musicbrainz-cleaner songs.json
|
||||
musicbrainz-cleaner songs.json cleaned_songs.json
|
||||
# Process all songs with default settings
|
||||
musicbrainz-cleaner
|
||||
|
||||
# Process specific file
|
||||
musicbrainz-cleaner --source data/my_songs.json
|
||||
|
||||
# Process with custom output files
|
||||
musicbrainz-cleaner --source data/songs.json --output-success cleaned.json --output-failure failed.json
|
||||
|
||||
# Process only first 1000 songs
|
||||
musicbrainz-cleaner --limit 1000
|
||||
|
||||
# Test connection
|
||||
musicbrainz-cleaner --test-connection
|
||||
musicbrainz-cleaner songs.json --limit 5
|
||||
musicbrainz-cleaner songs.json --use-api
|
||||
|
||||
# Force API mode
|
||||
musicbrainz-cleaner --use-api
|
||||
|
||||
# Force reprocessing (ignore all existing MBIDs)
|
||||
musicbrainz-cleaner --force
|
||||
|
||||
# Disable artist sorting (preserve original order)
|
||||
musicbrainz-cleaner --no-sort
|
||||
|
||||
REQUIREMENTS:
|
||||
- MusicBrainz server running on http://localhost:5001
|
||||
@ -473,12 +325,14 @@ PERFORMANCE:
|
||||
|
||||
def print_version() -> None:
|
||||
version_info = """
|
||||
MusicBrainz Data Cleaner v2.0.0
|
||||
MusicBrainz Data Cleaner v3.0.0
|
||||
|
||||
Enhanced with:
|
||||
- Direct PostgreSQL database access
|
||||
- Fuzzy search for better matching
|
||||
- Improved performance and accuracy
|
||||
- Separate output files for successful and failed songs
|
||||
- Detailed progress tracking and reporting
|
||||
|
||||
Copyright (c) 2024 MusicBrainz Data Cleaner Contributors
|
||||
MIT License - see LICENSE file for details
|
||||
@ -488,120 +342,128 @@ Built with Python 3.6+
|
||||
print(version_info)
|
||||
|
||||
|
||||
def parse_arguments(args: List[str]) -> Dict[str, Any]:
|
||||
"""Parse command line arguments into a dictionary"""
|
||||
parsed = {
|
||||
'source': 'data/songs.json',
|
||||
'output_success': None,
|
||||
'output_failure': None,
|
||||
'limit': None,
|
||||
'use_api': False,
|
||||
'test_connection': False,
|
||||
'force': False,
|
||||
'no_sort': False,
|
||||
'help': False,
|
||||
'version': False
|
||||
}
|
||||
|
||||
i = 0
|
||||
while i < len(args):
|
||||
arg = args[i]
|
||||
|
||||
if arg in ['--help', '-h', 'help']:
|
||||
parsed['help'] = True
|
||||
elif arg in ['--version', '-v', 'version']:
|
||||
parsed['version'] = True
|
||||
elif arg == '--test-connection':
|
||||
parsed['test_connection'] = True
|
||||
elif arg == '--use-api':
|
||||
parsed['use_api'] = True
|
||||
elif arg == '--force':
|
||||
parsed['force'] = True
|
||||
elif arg == '--no-sort':
|
||||
parsed['no_sort'] = True
|
||||
elif arg == '--source':
|
||||
if i + 1 < len(args) and not args[i + 1].startswith('--'):
|
||||
parsed['source'] = args[i + 1]
|
||||
i += 1
|
||||
else:
|
||||
print("Error: --source requires a file path")
|
||||
sys.exit(ExitCode.USAGE_ERROR)
|
||||
elif arg == '--output-success':
|
||||
if i + 1 < len(args) and not args[i + 1].startswith('--'):
|
||||
parsed['output_success'] = args[i + 1]
|
||||
i += 1
|
||||
else:
|
||||
print("Error: --output-success requires a file path")
|
||||
sys.exit(ExitCode.USAGE_ERROR)
|
||||
elif arg == '--output-failure':
|
||||
if i + 1 < len(args) and not args[i + 1].startswith('--'):
|
||||
parsed['output_failure'] = args[i + 1]
|
||||
i += 1
|
||||
else:
|
||||
print("Error: --output-failure requires a file path")
|
||||
sys.exit(ExitCode.USAGE_ERROR)
|
||||
elif arg == '--limit':
|
||||
if i + 1 < len(args) and not args[i + 1].startswith('--'):
|
||||
try:
|
||||
parsed['limit'] = int(args[i + 1])
|
||||
if parsed['limit'] <= 0:
|
||||
print("Error: --limit must be a positive number")
|
||||
sys.exit(ExitCode.USAGE_ERROR)
|
||||
except ValueError:
|
||||
print("Error: --limit requires a valid number")
|
||||
sys.exit(ExitCode.USAGE_ERROR)
|
||||
i += 1
|
||||
else:
|
||||
print("Error: --limit requires a number")
|
||||
sys.exit(ExitCode.USAGE_ERROR)
|
||||
|
||||
i += 1
|
||||
|
||||
return parsed
|
||||
|
||||
|
||||
def main() -> int:
|
||||
try:
|
||||
args = sys.argv[1:]
|
||||
parsed = parse_arguments(args)
|
||||
|
||||
# Handle help and version flags
|
||||
if not args or args[0] in ['--help', '-h', 'help']:
|
||||
if parsed['help']:
|
||||
print_help()
|
||||
return ExitCode.SUCCESS
|
||||
|
||||
if args[0] in ['--version', '-v', 'version']:
|
||||
if parsed['version']:
|
||||
print_version()
|
||||
return ExitCode.SUCCESS
|
||||
|
||||
# Check for API flag
|
||||
use_database = '--use-api' not in args
|
||||
if not use_database:
|
||||
print("⚠️ Using HTTP API mode (slower than database access)")
|
||||
|
||||
# Handle test connection
|
||||
if args[0] == '--test-connection':
|
||||
if use_database:
|
||||
db = MusicBrainzDatabase()
|
||||
if db.test_connection():
|
||||
print("✅ Connection to MusicBrainz database successful")
|
||||
return ExitCode.SUCCESS
|
||||
else:
|
||||
print("❌ Connection to MusicBrainz database failed")
|
||||
return ExitCode.ERROR
|
||||
if parsed['test_connection']:
|
||||
provider = DataProviderFactory.create_provider(not parsed['use_api'])
|
||||
if provider.test_connection():
|
||||
provider_type = "API server" if parsed['use_api'] else "database"
|
||||
print(f"✅ Connection to MusicBrainz {provider_type} successful")
|
||||
return ExitCode.SUCCESS
|
||||
else:
|
||||
api = MusicBrainzAPIClient()
|
||||
if api.test_connection():
|
||||
print("✅ Connection to MusicBrainz API server successful")
|
||||
return ExitCode.SUCCESS
|
||||
else:
|
||||
print("❌ Connection to MusicBrainz API server failed")
|
||||
return ExitCode.ERROR
|
||||
provider_type = "API server" if parsed['use_api'] else "database"
|
||||
print(f"❌ Connection to MusicBrainz {provider_type} failed")
|
||||
return ExitCode.ERROR
|
||||
|
||||
# Check for test connection flag in any position
|
||||
if '--test-connection' in args:
|
||||
if use_database:
|
||||
db = MusicBrainzDatabase()
|
||||
if db.test_connection():
|
||||
print("✅ Connection to MusicBrainz database successful")
|
||||
return ExitCode.SUCCESS
|
||||
else:
|
||||
print("❌ Connection to MusicBrainz database failed")
|
||||
return ExitCode.ERROR
|
||||
else:
|
||||
api = MusicBrainzAPIClient()
|
||||
if api.test_connection():
|
||||
print("✅ Connection to MusicBrainz API server successful")
|
||||
return ExitCode.SUCCESS
|
||||
else:
|
||||
print("❌ Connection to MusicBrainz API server failed")
|
||||
return ExitCode.ERROR
|
||||
# Process songs (main functionality)
|
||||
source_file = Path(parsed['source'])
|
||||
output_success = Path(parsed['output_success']) if parsed['output_success'] else None
|
||||
output_failure = Path(parsed['output_failure']) if parsed['output_failure'] else None
|
||||
|
||||
# Validate input file
|
||||
if not args:
|
||||
print("Error: Input file is required")
|
||||
print("Use --help for usage information")
|
||||
if not source_file.exists():
|
||||
print(f"Error: Source file does not exist: {source_file}")
|
||||
return ExitCode.USAGE_ERROR
|
||||
|
||||
# Parse limit argument and remove it from args
|
||||
limit = None
|
||||
args_to_remove = []
|
||||
for i, arg in enumerate(args):
|
||||
if arg == '--limit':
|
||||
if i + 1 < len(args) and not args[i + 1].startswith('--'):
|
||||
try:
|
||||
limit = int(args[i + 1])
|
||||
if limit <= 0:
|
||||
print("Error: Limit must be a positive number")
|
||||
return ExitCode.USAGE_ERROR
|
||||
args_to_remove.extend([i, i + 1])
|
||||
except ValueError:
|
||||
print("Error: --limit requires a valid number")
|
||||
return ExitCode.USAGE_ERROR
|
||||
else:
|
||||
print("Error: --limit requires a number")
|
||||
return ExitCode.USAGE_ERROR
|
||||
|
||||
# Remove limit arguments and API flag from args
|
||||
for index in reversed(args_to_remove):
|
||||
args.pop(index)
|
||||
|
||||
# Remove API flag
|
||||
args = [arg for arg in args if arg != '--use-api']
|
||||
|
||||
# Filter out remaining flags to get file arguments
|
||||
file_args = [arg for arg in args if not arg.startswith('--')]
|
||||
|
||||
if not file_args:
|
||||
print("Error: Input file is required")
|
||||
print("Use --help for usage information")
|
||||
if not source_file.is_file():
|
||||
print(f"Error: Source path is not a file: {source_file}")
|
||||
return ExitCode.USAGE_ERROR
|
||||
|
||||
input_file = Path(file_args[0])
|
||||
output_file = Path(file_args[1]) if len(file_args) > 1 else None
|
||||
|
||||
if not input_file.exists():
|
||||
print(f"Error: Input file does not exist: {input_file}")
|
||||
return ExitCode.USAGE_ERROR
|
||||
|
||||
if not input_file.is_file():
|
||||
print(f"Error: Input path is not a file: {input_file}")
|
||||
return ExitCode.USAGE_ERROR
|
||||
|
||||
if input_file.suffix.lower() != '.json':
|
||||
print(f"Error: Input file must be a JSON file: {input_file}")
|
||||
if source_file.suffix.lower() != '.json':
|
||||
print(f"Error: Source file must be a JSON file: {source_file}")
|
||||
return ExitCode.USAGE_ERROR
|
||||
|
||||
# Process the file
|
||||
cleaner = MusicBrainzCleaner(use_database=use_database)
|
||||
result_path = cleaner.clean_songs_file(input_file, output_file, limit)
|
||||
cleaner = MusicBrainzCleaner(
|
||||
use_database=not parsed['use_api'],
|
||||
force=parsed['force'],
|
||||
no_sort=parsed['no_sort']
|
||||
)
|
||||
cleaner.process_songs(source_file, output_success, output_failure, parsed['limit'])
|
||||
|
||||
return ExitCode.SUCCESS
|
||||
|
||||
|
||||
36
src/core/factory.py
Normal file
36
src/core/factory.py
Normal file
@ -0,0 +1,36 @@
|
||||
"""
|
||||
Factory for creating MusicBrainz data providers.
|
||||
Provides a clean way to instantiate the appropriate data provider based on configuration.
|
||||
"""
|
||||
|
||||
from typing import Optional
|
||||
from ..api.database import MusicBrainzDatabase
|
||||
from ..api.api_client import MusicBrainzAPIClient
|
||||
from .interfaces import MusicBrainzDataProvider
|
||||
from ..config.constants import DEFAULT_MUSICBRAINZ_URL
|
||||
|
||||
|
||||
class DataProviderFactory:
|
||||
"""Factory for creating MusicBrainz data providers."""
|
||||
|
||||
@staticmethod
|
||||
def create_provider(use_database: bool = True, base_url: str = DEFAULT_MUSICBRAINZ_URL) -> MusicBrainzDataProvider:
|
||||
"""
|
||||
Create a data provider based on configuration.
|
||||
|
||||
Args:
|
||||
use_database: Whether to use database (True) or API (False)
|
||||
base_url: Base URL for API client (only used if use_database=False)
|
||||
|
||||
Returns:
|
||||
MusicBrainzDataProvider instance
|
||||
"""
|
||||
if use_database:
|
||||
provider = MusicBrainzDatabase()
|
||||
if provider.connect():
|
||||
return provider
|
||||
else:
|
||||
print("⚠️ Database connection failed, falling back to API")
|
||||
return MusicBrainzAPIClient(base_url)
|
||||
else:
|
||||
return MusicBrainzAPIClient(base_url)
|
||||
60
src/core/interfaces.py
Normal file
60
src/core/interfaces.py
Normal file
@ -0,0 +1,60 @@
|
||||
"""
|
||||
Core interfaces for MusicBrainz Data Cleaner.
|
||||
Defines the common protocol that all data access implementations must follow.
|
||||
"""
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Dict, Optional, Any, Tuple, List
|
||||
|
||||
|
||||
class MusicBrainzDataProvider(ABC):
|
||||
"""
|
||||
Abstract base class defining the interface for MusicBrainz data access.
|
||||
Both database and API implementations must implement these methods.
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def test_connection(self) -> bool:
|
||||
"""Test connection to the data source."""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def fuzzy_search_artist(self, artist_name: str) -> Optional[Tuple[str, str, float]]:
|
||||
"""
|
||||
Fuzzy search for artist by name.
|
||||
Returns (artist_name, mbid, similarity_score) or None.
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def fuzzy_search_recording(self, title: str, artist_mbid: Optional[str] = None) -> Optional[Tuple[str, str, float]]:
|
||||
"""
|
||||
Fuzzy search for recording by title and optionally artist.
|
||||
Returns (recording_name, mbid, similarity_score) or None.
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def get_artist_info(self, mbid: str) -> Optional[Dict[str, Any]]:
|
||||
"""Get detailed artist information by MBID."""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def get_recording_info(self, mbid: str) -> Optional[Dict[str, Any]]:
|
||||
"""Get detailed recording information by MBID."""
|
||||
pass
|
||||
|
||||
|
||||
class SongProcessorInterface(ABC):
|
||||
"""
|
||||
Abstract base class defining the interface for song processing.
|
||||
This ensures consistent behavior across different implementations.
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def clean_song(self, song: Dict[str, Any]) -> Tuple[Dict[str, Any], bool]:
|
||||
"""
|
||||
Clean a single song.
|
||||
Returns (cleaned_song, success_status)
|
||||
"""
|
||||
pass
|
||||
280
src/core/song_processor.py
Normal file
280
src/core/song_processor.py
Normal file
@ -0,0 +1,280 @@
|
||||
"""
|
||||
Centralized song processing logic for MusicBrainz Data Cleaner.
|
||||
This module contains all song processing methods that should be shared
|
||||
between CLI and database interactions to ensure consistency.
|
||||
"""
|
||||
|
||||
from typing import Dict, Optional, Tuple, Any, List
|
||||
from ..utils.artist_title_processing import parse_complex_collaboration, generate_title_variations
|
||||
from ..utils.data_loader import data_loader
|
||||
from ..utils.artist_lookup import find_artist_fallback
|
||||
from .interfaces import MusicBrainzDataProvider, SongProcessorInterface
|
||||
|
||||
|
||||
class SongProcessor(SongProcessorInterface):
|
||||
"""Centralized song processing with consistent logic across all interfaces."""
|
||||
|
||||
def __init__(self, data_provider: MusicBrainzDataProvider):
|
||||
"""
|
||||
Initialize with a data provider that implements MusicBrainzDataProvider.
|
||||
|
||||
Args:
|
||||
data_provider: Instance implementing MusicBrainzDataProvider interface
|
||||
"""
|
||||
self.data_provider = data_provider
|
||||
|
||||
def find_artist_mbid(self, artist_name: str, recording_title: str = None) -> Optional[str]:
|
||||
"""
|
||||
Find artist MBID using consistent logic across all interfaces.
|
||||
|
||||
Args:
|
||||
artist_name: Name of the artist to search for
|
||||
recording_title: Optional recording title to help choose the correct artist
|
||||
|
||||
Returns:
|
||||
Artist MBID if found, None otherwise
|
||||
"""
|
||||
if not artist_name:
|
||||
return None
|
||||
|
||||
# If we have a recording title, use the improved search that considers the recording
|
||||
if recording_title and hasattr(self.data_provider, 'fuzzy_search_artist_with_recording'):
|
||||
result = self.data_provider.fuzzy_search_artist_with_recording(artist_name, recording_title)
|
||||
if result and isinstance(result, tuple) and len(result) >= 2:
|
||||
return result[1] # Return MBID from tuple (artist_name, mbid, score)
|
||||
|
||||
# Fallback to regular search
|
||||
result = self.data_provider.fuzzy_search_artist(artist_name)
|
||||
if result and isinstance(result, tuple) and len(result) >= 2:
|
||||
return result[1] # Return MBID from tuple (artist_name, mbid, score)
|
||||
|
||||
# Final fallback: Check the artist lookup table
|
||||
lookup_result = find_artist_fallback(artist_name, min_score=0.8)
|
||||
if lookup_result:
|
||||
print(f"🎯 Lookup table match found: {artist_name} → {lookup_result['canonical_name']} (score: {lookup_result['score']:.2f})")
|
||||
# Store the canonical name for later use in clean_song method
|
||||
if not hasattr(self, '_lookup_canonical_names'):
|
||||
self._lookup_canonical_names = {}
|
||||
self._lookup_canonical_names[artist_name] = lookup_result['canonical_name']
|
||||
return lookup_result['mbid']
|
||||
|
||||
return None
|
||||
|
||||
def find_recording_mbid(self, artist_mbid: str, title: str, original_artist: str = None) -> Optional[str]:
|
||||
"""
|
||||
Find recording MBID using consistent logic across all interfaces.
|
||||
|
||||
Args:
|
||||
artist_mbid: Artist MBID (None for collaborations)
|
||||
title: Song title
|
||||
original_artist: Original artist string (for collaboration verification)
|
||||
|
||||
Returns:
|
||||
Recording MBID if found, None otherwise
|
||||
"""
|
||||
if not title:
|
||||
return None
|
||||
|
||||
# Use the shared title variations logic for comprehensive title cleaning
|
||||
title_variations = generate_title_variations(title)
|
||||
|
||||
# Load known recordings data first
|
||||
known_recordings = data_loader.load_known_recordings()
|
||||
|
||||
# Check known recordings first with all title variations
|
||||
for (known_artist_mbid, known_title), recording_mbid in known_recordings.items():
|
||||
if artist_mbid == known_artist_mbid:
|
||||
for variation in title_variations:
|
||||
if variation.lower() == known_title.lower():
|
||||
return recording_mbid
|
||||
|
||||
# If we have an artist_mbid, try to find recording with that specific artist
|
||||
if artist_mbid:
|
||||
# Try to find recording with the specific artist MBID
|
||||
for variation in title_variations:
|
||||
result = self.data_provider.fuzzy_search_recording(variation, artist_mbid)
|
||||
if result and isinstance(result, tuple) and len(result) >= 2:
|
||||
return result[1] # Return recording MBID
|
||||
|
||||
# If no artist_mbid or recording not found, try multiple artist candidates
|
||||
if original_artist and hasattr(self.data_provider, 'fuzzy_search_artist_candidates'):
|
||||
print(f"🎯 Trying multiple artist candidates for '{original_artist}' with recording '{title}'...")
|
||||
|
||||
# Get multiple artist candidates
|
||||
candidates = self.data_provider.fuzzy_search_artist_candidates(original_artist, title)
|
||||
|
||||
# Try each candidate to find the recording
|
||||
for i, candidate in enumerate(candidates[:5]): # Limit to top 5 candidates
|
||||
candidate_mbid = candidate['gid']
|
||||
candidate_name = candidate['name']
|
||||
candidate_score = candidate['score']
|
||||
has_recording = candidate.get('has_recording', False)
|
||||
|
||||
print(f" 🔍 Trying candidate {i+1}: {candidate_name} (score: {candidate_score:.2f}, has recording: {has_recording})")
|
||||
|
||||
# Try to find recording with this artist candidate
|
||||
for variation in title_variations:
|
||||
result = self.data_provider.fuzzy_search_recording(variation, candidate_mbid)
|
||||
if result and isinstance(result, tuple) and len(result) >= 2:
|
||||
recording_mbid = result[1]
|
||||
print(f" ✅ Found recording with candidate {i+1}: {candidate_name}")
|
||||
return recording_mbid
|
||||
|
||||
# Handle collaborations using artist credit (fallback)
|
||||
# If no artist_mbid (collaboration case), try to find by title and verify artist credit
|
||||
if not artist_mbid and original_artist:
|
||||
# This is a collaboration case, try to find by title with all variations
|
||||
for variation in title_variations:
|
||||
result = self.data_provider.fuzzy_search_recording(variation)
|
||||
if result and isinstance(result, tuple) and len(result) >= 2:
|
||||
recording_mbid = result[1]
|
||||
|
||||
# Verify that this recording has the correct artist credit
|
||||
recording_info = self.get_recording_info(recording_mbid)
|
||||
if recording_info and recording_info.get('artist_credit'):
|
||||
# Check if the artist credit matches our expected collaboration
|
||||
expected_artist_string = original_artist.replace(',', ' & ').replace(' and ', ' & ')
|
||||
if recording_info['artist_credit'].lower() == expected_artist_string.lower():
|
||||
return recording_mbid
|
||||
|
||||
# If exact match fails, try partial match
|
||||
if recording_info and recording_info.get('artist_credit'):
|
||||
# Check if all artists in the collaboration are present in the recording
|
||||
main_artist, collaborators = parse_complex_collaboration(original_artist)
|
||||
recording_artists = recording_info['artist_credit'].lower()
|
||||
|
||||
# Check if main artist is in the recording
|
||||
if main_artist.lower() in recording_artists:
|
||||
# Check if at least one collaborator is also present
|
||||
for collaborator in collaborators:
|
||||
if collaborator.lower() in recording_artists:
|
||||
return recording_mbid
|
||||
|
||||
# Regular case with artist_mbid - try all title variations
|
||||
for variation in title_variations:
|
||||
result = self.data_provider.fuzzy_search_recording(variation, artist_mbid)
|
||||
if result and isinstance(result, tuple) and len(result) >= 2:
|
||||
return result[1] # Return MBID from tuple (recording_name, mbid, score)
|
||||
|
||||
return None
|
||||
|
||||
return None
|
||||
|
||||
def get_artist_info(self, mbid: str) -> Optional[Dict[str, Any]]:
|
||||
"""Get artist info using consistent logic."""
|
||||
return self.data_provider.get_artist_info(mbid)
|
||||
|
||||
def get_recording_info(self, mbid: str) -> Optional[Dict[str, Any]]:
|
||||
"""Get recording info using consistent logic."""
|
||||
return self.data_provider.get_recording_info(mbid)
|
||||
|
||||
def _build_artist_string(self, artist_credit: list) -> str:
|
||||
"""Build artist string from artist credit list (API format)."""
|
||||
if not artist_credit:
|
||||
return ""
|
||||
parts = []
|
||||
for credit in artist_credit:
|
||||
parts.append(credit['name'])
|
||||
if credit.get('joinphrase'):
|
||||
parts.append(credit['joinphrase'])
|
||||
return ''.join(parts).strip()
|
||||
|
||||
def clean_song(self, song: Dict[str, Any]) -> Tuple[Dict[str, Any], bool]:
|
||||
"""
|
||||
Clean a single song using consistent logic across all interfaces.
|
||||
|
||||
Args:
|
||||
song: Song dictionary with 'artist' and 'title' keys
|
||||
|
||||
Returns:
|
||||
Tuple of (cleaned_song, success_status)
|
||||
"""
|
||||
# Clear lookup cache for this song
|
||||
if hasattr(self, '_lookup_canonical_names'):
|
||||
self._lookup_canonical_names = {}
|
||||
|
||||
original_artist = song.get('artist', '')
|
||||
original_title = song.get('title', '')
|
||||
|
||||
# Find artist MBID (pass recording title to help choose correct artist)
|
||||
artist_mbid = self.find_artist_mbid(song.get('artist', ''), song.get('title', ''))
|
||||
|
||||
# Handle collaboration case (artist_mbid is None)
|
||||
# Use the same complex collaboration parsing as the database
|
||||
artist_name = song.get('artist', '')
|
||||
main_artist, collaborators = parse_complex_collaboration(artist_name)
|
||||
has_collaboration = len(collaborators) > 0
|
||||
|
||||
if artist_mbid is None and has_collaboration:
|
||||
# Try to find recording using the improved find_recording_mbid method
|
||||
recording_mbid = self.find_recording_mbid(None, song.get('title', ''), original_artist)
|
||||
if recording_mbid:
|
||||
# Get recording info to update the song
|
||||
recording_info = self.get_recording_info(recording_mbid)
|
||||
if recording_info:
|
||||
# Update with the correct artist credit and title
|
||||
if recording_info.get('artist_credit'):
|
||||
song['artist'] = recording_info['artist_credit']
|
||||
elif recording_info.get('artist-credit'):
|
||||
artist_string = self._build_artist_string(recording_info['artist-credit'])
|
||||
if artist_string:
|
||||
song['artist'] = artist_string
|
||||
|
||||
song['title'] = recording_info['title']
|
||||
song['recording_mbid'] = recording_mbid
|
||||
|
||||
# For collaborations, try to get the main artist's MBID
|
||||
main_artist, collaborators = parse_complex_collaboration(song.get('artist', ''))
|
||||
if main_artist:
|
||||
artist_result = self.data_provider.fuzzy_search_artist(main_artist)
|
||||
if artist_result and isinstance(artist_result, tuple) and len(artist_result) >= 2:
|
||||
song['mbid'] = artist_result[1] # Set the main artist's MBID
|
||||
|
||||
return song, True
|
||||
return song, False
|
||||
|
||||
# Regular case (non-collaboration or collaboration not found)
|
||||
if not artist_mbid:
|
||||
return song, False
|
||||
|
||||
# Get artist info
|
||||
artist_info = self.get_artist_info(artist_mbid)
|
||||
if artist_info:
|
||||
# Check if we have a canonical name from lookup table
|
||||
original_artist = song.get('artist', '')
|
||||
if hasattr(self, '_lookup_canonical_names') and original_artist in self._lookup_canonical_names:
|
||||
# Use the canonical name from lookup table instead of database name
|
||||
song['artist'] = self._lookup_canonical_names[original_artist]
|
||||
print(f" 🔄 Updated artist name: {original_artist} → {song['artist']}")
|
||||
else:
|
||||
# Use the database name
|
||||
song['artist'] = artist_info['name']
|
||||
song['mbid'] = artist_mbid
|
||||
|
||||
# Find recording MBID (pass original artist for multiple candidate search)
|
||||
recording_mbid = self.find_recording_mbid(artist_mbid, song.get('title', ''), original_artist)
|
||||
if not recording_mbid:
|
||||
return song, False
|
||||
|
||||
# Get recording info
|
||||
recording_info = self.get_recording_info(recording_mbid)
|
||||
if recording_info:
|
||||
# Update artist string if there are multiple artists, but preserve the artist MBID
|
||||
if recording_info.get('artist_credit'):
|
||||
song['artist'] = recording_info['artist_credit']
|
||||
# Keep the original artist MBID even when updating artist name
|
||||
if 'mbid' not in song:
|
||||
song['mbid'] = artist_mbid
|
||||
elif recording_info.get('artist-credit'):
|
||||
artist_string = self._build_artist_string(recording_info['artist-credit'])
|
||||
if artist_string:
|
||||
song['artist'] = artist_string
|
||||
# Keep the original artist MBID even when updating artist name
|
||||
if 'mbid' not in song:
|
||||
song['mbid'] = artist_mbid
|
||||
|
||||
song['title'] = recording_info['title']
|
||||
song['recording_mbid'] = recording_mbid
|
||||
return song, True
|
||||
|
||||
return song, False
|
||||
@ -10,9 +10,24 @@ src/tests/
|
||||
├── integration/ # Integration tests for database and API
|
||||
├── debug/ # Debug scripts and troubleshooting tests
|
||||
├── run_tests.py # Test runner script
|
||||
└── README.md # This file
|
||||
├── README.md # This file
|
||||
├── legacy/ # Legacy scripts moved from root directory
|
||||
└── moved/ # Test files moved from root directory
|
||||
```
|
||||
|
||||
### Legacy Scripts (Moved from Root)
|
||||
- `process_full_dataset.py` - Legacy script that redirects to new CLI
|
||||
- `musicbrainz_cleaner.py` - Legacy entry point script
|
||||
|
||||
### Moved Test Files (Moved from Root)
|
||||
- `test_title_cleaning.py` - Test title cleaning functionality
|
||||
- `test_simple_query.py` - Test simple database queries
|
||||
- `debug_artist_search.py` - Debug artist search functionality
|
||||
- `test_failed_collaborations.py` - Test failed collaboration cases
|
||||
- `test_collaboration_debug.py` - Debug collaboration parsing
|
||||
- `test_100_random.py` - Test 100 random songs
|
||||
- `quick_test_20.py` - Quick test with 20 songs
|
||||
|
||||
## 🧪 Test Categories
|
||||
|
||||
### Unit Tests (`unit/`)
|
||||
@ -44,6 +59,39 @@ src/tests/
|
||||
python3 src/tests/run_tests.py
|
||||
```
|
||||
|
||||
### Running Moved Test Files
|
||||
|
||||
The following test files were moved from the root directory to `src/tests/`:
|
||||
|
||||
```bash
|
||||
# Run individual moved test files
|
||||
python3 src/tests/test_100_random.py
|
||||
python3 src/tests/quick_test_20.py
|
||||
python3 src/tests/test_title_cleaning.py
|
||||
python3 src/tests/test_simple_query.py
|
||||
python3 src/tests/debug_artist_search.py
|
||||
python3 src/tests/test_failed_collaborations.py
|
||||
python3 src/tests/test_collaboration_debug.py
|
||||
```
|
||||
|
||||
### Running Legacy Scripts
|
||||
|
||||
Legacy scripts that redirect to the new CLI:
|
||||
|
||||
```bash
|
||||
# Legacy full dataset processing (redirects to CLI)
|
||||
python3 src/tests/process_full_dataset.py
|
||||
|
||||
# Legacy entry point (redirects to CLI)
|
||||
python3 src/tests/musicbrainz_cleaner.py
|
||||
```
|
||||
|
||||
**Note**: These legacy scripts are kept for backward compatibility but the new CLI is preferred:
|
||||
```bash
|
||||
# Preferred method (new CLI)
|
||||
docker-compose run --rm musicbrainz-cleaner python3 -m src.cli.main
|
||||
```
|
||||
|
||||
### Run Specific Test Categories
|
||||
```bash
|
||||
# Run only unit tests
|
||||
|
||||
40
src/tests/debug_artist_search.py
Normal file
40
src/tests/debug_artist_search.py
Normal file
@ -0,0 +1,40 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Debug script for individual artist searches
|
||||
"""
|
||||
|
||||
import sys
|
||||
sys.path.insert(0, '/app')
|
||||
|
||||
from src.api.database import MusicBrainzDatabase
|
||||
|
||||
def test_artist_search():
|
||||
db = MusicBrainzDatabase()
|
||||
|
||||
# Test the main artists from the collaborations
|
||||
test_artists = [
|
||||
"Kanye", # From "ft Jamie Foxx West, Kanye"
|
||||
"Kanye West", # What it should be
|
||||
"Ariana Grande", # From "Ariana Grande, Normani, Nicki Minaj"
|
||||
"SZA", # From "SZA, Justin Bieber"
|
||||
]
|
||||
|
||||
print("🔍 Testing Individual Artist Searches")
|
||||
print("=" * 50)
|
||||
|
||||
for artist_name in test_artists:
|
||||
print(f"\n📝 Searching for: '{artist_name}'")
|
||||
|
||||
try:
|
||||
result = db.fuzzy_search_artist(artist_name)
|
||||
if result:
|
||||
matched_name, mbid, score = result
|
||||
print(f" ✅ Found: '{matched_name}' (MBID: {mbid}, Score: {score})")
|
||||
else:
|
||||
print(f" ❌ Not found")
|
||||
|
||||
except Exception as e:
|
||||
print(f" ❌ Error: {e}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_artist_search()
|
||||
42
src/tests/process_full_dataset.py
Normal file
42
src/tests/process_full_dataset.py
Normal file
@ -0,0 +1,42 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Legacy script for full dataset processing.
|
||||
This script now redirects to the new CLI command for better functionality.
|
||||
"""
|
||||
|
||||
import sys
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
|
||||
def main():
|
||||
print("🔄 Redirecting to new CLI command...")
|
||||
print("📝 Use: docker-compose run --rm musicbrainz-cleaner python3 -m src.cli.main --process-full-dataset")
|
||||
print()
|
||||
|
||||
# Get the current script's directory
|
||||
script_dir = Path(__file__).parent
|
||||
|
||||
# Build the new command
|
||||
cmd = [
|
||||
"docker-compose", "run", "--rm", "musicbrainz-cleaner",
|
||||
"python3", "-m", "src.cli.main", "--process-full-dataset"
|
||||
]
|
||||
|
||||
# Add any additional arguments passed to this script
|
||||
if len(sys.argv) > 1:
|
||||
cmd.extend(sys.argv[1:])
|
||||
|
||||
try:
|
||||
# Change to the script directory and run the command
|
||||
subprocess.run(cmd, cwd=script_dir, check=True)
|
||||
except subprocess.CalledProcessError as e:
|
||||
print(f"❌ Error running command: {e}")
|
||||
sys.exit(1)
|
||||
except FileNotFoundError:
|
||||
print("❌ Error: docker-compose not found. Make sure Docker is running.")
|
||||
print("💡 Alternative: Run directly with:")
|
||||
print(" python3 -m src.cli.main --process-full-dataset")
|
||||
sys.exit(1)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
108
src/tests/quick_test_20.py
Normal file
108
src/tests/quick_test_20.py
Normal file
@ -0,0 +1,108 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Quick test script for 20 random songs
|
||||
Simple single-threaded approach
|
||||
"""
|
||||
|
||||
import sys
|
||||
import json
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
# Add the src directory to the path
|
||||
sys.path.insert(0, '/app')
|
||||
from src.cli.main import MusicBrainzCleaner
|
||||
|
||||
def main():
|
||||
print('🚀 Starting quick test with 20 random songs...')
|
||||
|
||||
# Load songs
|
||||
input_file = Path('data/songs.json')
|
||||
if not input_file.exists():
|
||||
print('❌ songs.json not found')
|
||||
return
|
||||
|
||||
with open(input_file, 'r') as f:
|
||||
all_songs = json.load(f)
|
||||
|
||||
print(f'📊 Total songs available: {len(all_songs):,}')
|
||||
|
||||
# Take 20 random songs
|
||||
import random
|
||||
sample_songs = random.sample(all_songs, 20)
|
||||
print(f'🎯 Testing 20 random songs...')
|
||||
|
||||
# Initialize cleaner
|
||||
cleaner = MusicBrainzCleaner()
|
||||
|
||||
# Process songs
|
||||
found_artists = 0
|
||||
found_recordings = 0
|
||||
failed_songs = []
|
||||
|
||||
start_time = time.time()
|
||||
|
||||
for i, song in enumerate(sample_songs, 1):
|
||||
print(f' [{i:2d}/20] Processing: "{song.get("artist", "Unknown")}" - "{song.get("title", "Unknown")}"')
|
||||
|
||||
try:
|
||||
result = cleaner.clean_song(song)
|
||||
|
||||
artist_found = 'mbid' in result
|
||||
recording_found = 'recording_mbid' in result
|
||||
|
||||
if artist_found and recording_found:
|
||||
found_artists += 1
|
||||
found_recordings += 1
|
||||
print(f' ✅ Found both artist and recording')
|
||||
else:
|
||||
failed_songs.append({
|
||||
'original': song,
|
||||
'cleaned': result,
|
||||
'artist_found': artist_found,
|
||||
'recording_found': recording_found,
|
||||
'artist_name': song.get('artist', 'Unknown'),
|
||||
'title': song.get('title', 'Unknown')
|
||||
})
|
||||
print(f' ❌ Artist: {artist_found}, Recording: {recording_found}')
|
||||
|
||||
except Exception as e:
|
||||
print(f' 💥 Error: {e}')
|
||||
failed_songs.append({
|
||||
'original': song,
|
||||
'cleaned': {'error': str(e)},
|
||||
'artist_found': False,
|
||||
'recording_found': False,
|
||||
'artist_name': song.get('artist', 'Unknown'),
|
||||
'title': song.get('title', 'Unknown'),
|
||||
'error': str(e)
|
||||
})
|
||||
|
||||
end_time = time.time()
|
||||
processing_time = end_time - start_time
|
||||
|
||||
# Calculate success rates
|
||||
artist_success_rate = found_artists / 20 * 100
|
||||
recording_success_rate = found_recordings / 20 * 100
|
||||
failed_rate = len(failed_songs) / 20 * 100
|
||||
|
||||
print(f'\n📊 Final Results:')
|
||||
print(f' ⏱️ Processing time: {processing_time:.2f} seconds')
|
||||
print(f' 🚀 Speed: {20/processing_time:.1f} songs/second')
|
||||
print(f' ✅ Artists found: {found_artists}/20 ({artist_success_rate:.1f}%)')
|
||||
print(f' ✅ Recordings found: {found_recordings}/20 ({recording_success_rate:.1f}%)')
|
||||
print(f' ❌ Failed songs: {len(failed_songs)} ({failed_rate:.1f}%)')
|
||||
|
||||
# Show failed songs
|
||||
if failed_songs:
|
||||
print(f'\n🔍 Failed songs:')
|
||||
for i, failed in enumerate(failed_songs, 1):
|
||||
print(f' [{i}] "{failed["artist_name"]}" - "{failed["title"]}"')
|
||||
print(f' Artist found: {failed["artist_found"]}, Recording found: {failed["recording_found"]}')
|
||||
if 'error' in failed:
|
||||
print(f' Error: {failed["error"]}')
|
||||
else:
|
||||
print('\n🎉 All songs processed successfully!')
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
1
src/tests/test-current-failure.json
Normal file
1
src/tests/test-current-failure.json
Normal file
@ -0,0 +1 @@
|
||||
[]
|
||||
@ -15,7 +15,8 @@
|
||||
"favorite": false,
|
||||
"guid": "f9fd52fc-0c5a-01d5-caf5-4476e9172401",
|
||||
"path": "z://MP4\\Afrojack and Chris Brown - As Your Friend.mp4",
|
||||
"title": "As Your Friend",
|
||||
"title": "As Your Friend (Leroy Styles remix)",
|
||||
"recording_mbid": "8abb8690-716e-420a-bd3d-c7d2b21c7b78",
|
||||
"mbid": "a3ee920f-4e7f-4993-8aca-4b8538cfaa4a"
|
||||
},
|
||||
{
|
||||
@ -24,8 +25,9 @@
|
||||
"favorite": false,
|
||||
"guid": "ff27874e-68c3-2c0b-d302-a4bf36d2f76c",
|
||||
"path": "z://MP4\\Andy Grammer - Honey I Am Good.mp4",
|
||||
"title": "Honey I Am Good",
|
||||
"mbid": "9e60ea29-9607-4f7d-aa96-2092ef41f0d3"
|
||||
"title": "Honey I’m Good",
|
||||
"mbid": "9e60ea29-9607-4f7d-aa96-2092ef41f0d3",
|
||||
"recording_mbid": "96bb3f30-8b52-42d3-a8f9-bb2b2495e55a"
|
||||
},
|
||||
{
|
||||
"artist": "Avicii",
|
||||
@ -33,7 +35,8 @@
|
||||
"favorite": false,
|
||||
"guid": "9df89f14-a568-14a3-5081-77d6ee3d5cf8",
|
||||
"path": "z://MP4\\Avicii and Nicky Romero - I Could Be The One.mp4",
|
||||
"title": "I Could Be The One",
|
||||
"title": "Hey Brother / You Make Me / I Could Be the One",
|
||||
"recording_mbid": "3f63aa64-abc1-45bb-915a-5701bf83622b",
|
||||
"mbid": "c85cfd6b-b1e9-4a50-bd55-eb725f04f7d5"
|
||||
},
|
||||
{
|
||||
@ -42,7 +45,8 @@
|
||||
"favorite": false,
|
||||
"guid": "71b6e7e2-3ad5-1b46-b4b5-d33b961451f7",
|
||||
"path": "z://MP4\\Bastille - Flaws.mp4",
|
||||
"title": "Flaws",
|
||||
"mbid": "7808accb-6395-4b25-858c-678bbb73896b"
|
||||
"title": "Flaws (Cinematic's in My Soul remix)",
|
||||
"mbid": "7808accb-6395-4b25-858c-678bbb73896b",
|
||||
"recording_mbid": "44e2adef-8088-41f2-913b-10dc67ef3185"
|
||||
}
|
||||
]
|
||||
54
src/tests/test-failure.json
Normal file
54
src/tests/test-failure.json
Normal file
@ -0,0 +1,54 @@
|
||||
[
|
||||
{
|
||||
"artist": "Kristen Bell, Idina Menzel",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"guid": "0e0e5999-0c63-c817-84ad-7e27aea0cc54",
|
||||
"path": "z://MP4\\Kristen Bell, Idina Menzel - For The First Time In Forever.mp4",
|
||||
"title": "For The First Time In Forever"
|
||||
},
|
||||
{
|
||||
"artist": "Lady Gaga",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"guid": "93aca149-788e-710a-5d17-14080727c27d",
|
||||
"path": "z://MP4\\Lady Gaga - I'll Never Love Again (Karaoke Instrumental) A Star Is Born.mp4",
|
||||
"title": "I'll Never Love Again (A Star is Born)",
|
||||
"mbid": "650e7db6-b795-4eb5-a702-5ea2fc46c848"
|
||||
},
|
||||
{
|
||||
"artist": "Lady Gaga, Bradley Cooper",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"guid": "5e21d904-5cfb-54bc-1165-f47e0634afe0",
|
||||
"path": "z://MP4\\Lady Gaga, Bradley Cooper - Shallow (Karaoke Instrumental) A Star Is Born.mp4",
|
||||
"title": "Shallow"
|
||||
},
|
||||
{
|
||||
"artist": "Len",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"guid": "9f29d6a7-a31d-d02b-8954-2a3af1a124dc",
|
||||
"path": "z://MP4\\Len - Steal My Sunshine.mp4",
|
||||
"title": "Steal My Sunshine",
|
||||
"mbid": "cb53fc63-8a50-4320-a3db-ac55b0f115a7"
|
||||
},
|
||||
{
|
||||
"artist": "Machine Gun Kelly",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"guid": "95690600-55b6-af32-afec-d8d48338f11f",
|
||||
"path": "z://MP4\\Machine Gun Kelly - Bloody Valentine (Karaoke Instrumental).mp4",
|
||||
"title": "Bloody Valentine",
|
||||
"mbid": "8d9315a4-ee01-427f-92ae-53162e008c51"
|
||||
},
|
||||
{
|
||||
"artist": "Zedd, Maren Morris & Grey",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"guid": "138120ce-0716-94a8-a6ab-fcfb24f59c33",
|
||||
"path": "z://MP4\\Zedd, Maren Morris, Grey - The Middle (Karaoke).mp4",
|
||||
"title": "The Middle",
|
||||
"recording_mbid": "b9b346ff-702c-46f2-8072-34ab349bf39d"
|
||||
}
|
||||
]
|
||||
12
src/tests/test-success.json
Normal file
12
src/tests/test-success.json
Normal file
@ -0,0 +1,12 @@
|
||||
[
|
||||
{
|
||||
"artist": "Juice WRLD & Marshmello",
|
||||
"disabled": false,
|
||||
"favorite": false,
|
||||
"guid": "24bbbfd4-3ee5-0661-9f77-5a27ff024656",
|
||||
"path": "z://MP4\\Come & Go - Juice WRLD & Marshmello (Karaoke Instrumental).mp4",
|
||||
"title": "Come & Go",
|
||||
"mbid": "4e4ebde4-0c56-4dec-844b-6c73adcdd92d",
|
||||
"recording_mbid": "848db1a2-bdb2-4190-921c-2c06989e9b8c"
|
||||
}
|
||||
]
|
||||
133
src/tests/test_100_random.py
Normal file
133
src/tests/test_100_random.py
Normal file
@ -0,0 +1,133 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Test script for 100 random songs
|
||||
Simple single-threaded approach
|
||||
"""
|
||||
|
||||
import sys
|
||||
import json
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
# Add the src directory to the path
|
||||
sys.path.insert(0, '/app')
|
||||
from src.cli.main import MusicBrainzCleaner
|
||||
|
||||
def main():
|
||||
print('🚀 Starting test with 100 random songs...')
|
||||
|
||||
# Load songs
|
||||
input_file = Path('data/songs.json')
|
||||
if not input_file.exists():
|
||||
print('❌ songs.json not found')
|
||||
return
|
||||
|
||||
with open(input_file, 'r') as f:
|
||||
all_songs = json.load(f)
|
||||
|
||||
print(f'📊 Total songs available: {len(all_songs):,}')
|
||||
|
||||
# Take 100 random songs
|
||||
import random
|
||||
sample_songs = random.sample(all_songs, 100)
|
||||
print(f'🎯 Testing 100 random songs...')
|
||||
|
||||
# Initialize cleaner
|
||||
cleaner = MusicBrainzCleaner()
|
||||
|
||||
# Process songs
|
||||
found_artists = 0
|
||||
found_recordings = 0
|
||||
failed_songs = []
|
||||
|
||||
start_time = time.time()
|
||||
|
||||
for i, song in enumerate(sample_songs, 1):
|
||||
print(f' [{i:3d}/100] Processing: "{song.get("artist", "Unknown")}" - "{song.get("title", "Unknown")}"')
|
||||
|
||||
try:
|
||||
result = cleaner.clean_song(song)
|
||||
|
||||
# Extract the cleaned song from the tuple (song_dict, success_boolean)
|
||||
cleaned_song, success = result
|
||||
|
||||
artist_found = 'mbid' in cleaned_song
|
||||
recording_found = 'recording_mbid' in cleaned_song
|
||||
|
||||
if artist_found and recording_found:
|
||||
found_artists += 1
|
||||
found_recordings += 1
|
||||
print(f' ✅ Found both artist and recording')
|
||||
else:
|
||||
failed_songs.append({
|
||||
'original': song,
|
||||
'cleaned': cleaned_song,
|
||||
'success': success,
|
||||
'artist_found': artist_found,
|
||||
'recording_found': recording_found,
|
||||
'artist_name': song.get('artist', 'Unknown'),
|
||||
'title': song.get('title', 'Unknown')
|
||||
})
|
||||
print(f' ❌ Artist: {artist_found}, Recording: {recording_found}')
|
||||
|
||||
except Exception as e:
|
||||
print(f' 💥 Error: {e}')
|
||||
failed_songs.append({
|
||||
'original': song,
|
||||
'cleaned': {'error': str(e)},
|
||||
'artist_found': False,
|
||||
'recording_found': False,
|
||||
'artist_name': song.get('artist', 'Unknown'),
|
||||
'title': song.get('title', 'Unknown'),
|
||||
'error': str(e)
|
||||
})
|
||||
|
||||
end_time = time.time()
|
||||
processing_time = end_time - start_time
|
||||
|
||||
# Calculate success rates
|
||||
artist_success_rate = found_artists / 100 * 100
|
||||
recording_success_rate = found_recordings / 100 * 100
|
||||
failed_rate = len(failed_songs) / 100 * 100
|
||||
|
||||
print(f'\n📊 Final Results:')
|
||||
print(f' ⏱️ Processing time: {processing_time:.2f} seconds')
|
||||
print(f' 🚀 Speed: {100/processing_time:.1f} songs/second')
|
||||
print(f' ✅ Artists found: {found_artists}/100 ({artist_success_rate:.1f}%)')
|
||||
print(f' ✅ Recordings found: {found_recordings}/100 ({recording_success_rate:.1f}%)')
|
||||
print(f' ❌ Failed songs: {len(failed_songs)} ({failed_rate:.1f}%)')
|
||||
|
||||
# Save detailed report
|
||||
if failed_songs:
|
||||
report_file = 'data/test_100_results.json'
|
||||
report_data = {
|
||||
'test_summary': {
|
||||
'total_tested': 100,
|
||||
'artists_found': found_artists,
|
||||
'recordings_found': found_recordings,
|
||||
'failed_count': len(failed_songs),
|
||||
'artist_success_rate': artist_success_rate,
|
||||
'recording_success_rate': recording_success_rate,
|
||||
'processing_time_seconds': processing_time,
|
||||
'songs_per_second': 100/processing_time
|
||||
},
|
||||
'failed_songs': failed_songs
|
||||
}
|
||||
|
||||
with open(report_file, 'w') as f:
|
||||
json.dump(report_data, f, indent=2)
|
||||
|
||||
print(f'\n📄 Detailed report saved to: {report_file}')
|
||||
|
||||
# Show examples of failed songs
|
||||
print(f'\n🔍 Examples of failed songs:')
|
||||
for i, failed in enumerate(failed_songs[:10], 1):
|
||||
print(f' [{i}] "{failed["artist_name"]}" - "{failed["title"]}"')
|
||||
print(f' Artist found: {failed["artist_found"]}, Recording found: {failed["recording_found"]}')
|
||||
if 'error' in failed:
|
||||
print(f' Error: {failed["error"]}')
|
||||
else:
|
||||
print('\n🎉 All songs processed successfully!')
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
43
src/tests/test_collaboration_debug.py
Normal file
43
src/tests/test_collaboration_debug.py
Normal file
@ -0,0 +1,43 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Debug script for collaboration parsing issues
|
||||
"""
|
||||
|
||||
import sys
|
||||
sys.path.insert(0, '/app')
|
||||
|
||||
from src.api.database import MusicBrainzDatabase
|
||||
|
||||
def test_collaboration_parsing():
|
||||
db = MusicBrainzDatabase()
|
||||
|
||||
# Test cases from the failed songs
|
||||
test_cases = [
|
||||
"ft Jamie Foxx West, Kanye",
|
||||
"Ariana Grande, Normani, Nicki Minaj",
|
||||
"SZA, Justin Bieber",
|
||||
"Will.I.Am ft. Britney Spears", # This should work now
|
||||
"Florida Georgia Line Ft Luke Bryan", # This should work now
|
||||
]
|
||||
|
||||
print("🔍 Testing Collaboration Parsing")
|
||||
print("=" * 50)
|
||||
|
||||
for artist_string in test_cases:
|
||||
print(f"\n📝 Input: '{artist_string}'")
|
||||
|
||||
try:
|
||||
main_artist, collaborators = db._parse_complex_collaboration(artist_string)
|
||||
print(f" ✅ Main artist: '{main_artist}'")
|
||||
print(f" ✅ Collaborators: {collaborators}")
|
||||
|
||||
if collaborators:
|
||||
print(f" 🎯 Parsed as collaboration")
|
||||
else:
|
||||
print(f" ❌ Not detected as collaboration")
|
||||
|
||||
except Exception as e:
|
||||
print(f" ❌ Error: {e}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_collaboration_parsing()
|
||||
62
src/tests/test_failed_collaborations.py
Normal file
62
src/tests/test_failed_collaborations.py
Normal file
@ -0,0 +1,62 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Test script for the specific failed collaboration songs
|
||||
"""
|
||||
|
||||
import sys
|
||||
import json
|
||||
from pathlib import Path
|
||||
|
||||
# Add the src directory to the path
|
||||
sys.path.insert(0, '/app')
|
||||
from src.cli.main import MusicBrainzCleaner
|
||||
|
||||
def main():
|
||||
print('🔍 Testing Failed Collaboration Songs...')
|
||||
|
||||
# Create test songs from the failed ones
|
||||
failed_songs = [
|
||||
{
|
||||
"artist": "ft Jamie Foxx West, Kanye",
|
||||
"title": "Gold Digger"
|
||||
},
|
||||
{
|
||||
"artist": "Ariana Grande, Normani, Nicki Minaj",
|
||||
"title": "Bad To You"
|
||||
},
|
||||
{
|
||||
"artist": "SZA, Justin Bieber",
|
||||
"title": "Snooze (Acoustic)"
|
||||
}
|
||||
]
|
||||
|
||||
# Initialize cleaner
|
||||
cleaner = MusicBrainzCleaner()
|
||||
|
||||
print(f'🎯 Testing {len(failed_songs)} collaboration songs...')
|
||||
|
||||
for i, song in enumerate(failed_songs, 1):
|
||||
print(f'\n [{i}/{len(failed_songs)}] Processing: "{song["artist"]}" - "{song["title"]}"')
|
||||
|
||||
try:
|
||||
result = cleaner.clean_song(song)
|
||||
cleaned_song, success = result
|
||||
|
||||
artist_found = 'mbid' in cleaned_song
|
||||
recording_found = 'recording_mbid' in cleaned_song
|
||||
|
||||
if artist_found and recording_found:
|
||||
print(f' ✅ Found both artist and recording')
|
||||
print(f' 🎯 Artist: {cleaned_song.get("artist", "Unknown")} (MBID: {cleaned_song.get("mbid", "None")})')
|
||||
print(f' 🎯 Recording: {cleaned_song.get("recording_mbid", "None")}')
|
||||
elif artist_found:
|
||||
print(f' ⚠️ Artist found but recording not found')
|
||||
print(f' 🎯 Artist: {cleaned_song.get("artist", "Unknown")} (MBID: {cleaned_song.get("mbid", "None")})')
|
||||
else:
|
||||
print(f' ❌ Neither artist nor recording found')
|
||||
|
||||
except Exception as e:
|
||||
print(f' ❌ Error: {e}')
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
48
src/tests/test_simple_query.py
Normal file
48
src/tests/test_simple_query.py
Normal file
@ -0,0 +1,48 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Simple database query test
|
||||
"""
|
||||
|
||||
import sys
|
||||
sys.path.insert(0, '/app')
|
||||
|
||||
from src.api.database import MusicBrainzDatabase
|
||||
|
||||
def test_simple_query():
|
||||
db = MusicBrainzDatabase()
|
||||
|
||||
if not db.connect():
|
||||
print("❌ Could not connect to database")
|
||||
return
|
||||
|
||||
print("✅ Connected to database")
|
||||
|
||||
# Test a simple query
|
||||
try:
|
||||
db.cursor.execute("SELECT COUNT(*) FROM artist")
|
||||
count = db.cursor.fetchone()
|
||||
print(f"📊 Total artists in database: {count['count']:,}")
|
||||
|
||||
# Test specific artist query
|
||||
db.cursor.execute("SELECT name, gid FROM artist WHERE name = %s LIMIT 1", ('Ariana Grande',))
|
||||
result = db.cursor.fetchone()
|
||||
if result:
|
||||
print(f"✅ Found Ariana Grande: {result['name']} (MBID: {result['gid']})")
|
||||
else:
|
||||
print("❌ Ariana Grande not found")
|
||||
|
||||
# Test SZA
|
||||
db.cursor.execute("SELECT name, gid FROM artist WHERE name = %s LIMIT 1", ('SZA',))
|
||||
result = db.cursor.fetchone()
|
||||
if result:
|
||||
print(f"✅ Found SZA: {result['name']} (MBID: {result['gid']})")
|
||||
else:
|
||||
print("❌ SZA not found")
|
||||
|
||||
except Exception as e:
|
||||
print(f"❌ Error: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_simple_query()
|
||||
50
src/tests/test_title_cleaning.py
Normal file
50
src/tests/test_title_cleaning.py
Normal file
@ -0,0 +1,50 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Test script for improved title cleaning
|
||||
"""
|
||||
|
||||
import sys
|
||||
sys.path.insert(0, '/app')
|
||||
|
||||
from src.api.database import MusicBrainzDatabase
|
||||
|
||||
def test_title_cleaning():
|
||||
db = MusicBrainzDatabase()
|
||||
|
||||
# Test cases from the failed songs
|
||||
test_titles = [
|
||||
"Do I Wanna Know? - Live At the BBC",
|
||||
"All The Small Things (John Lewis Christmas Ad 2022)",
|
||||
"I Don t F k With You",
|
||||
"Por Mujeres Como Tu",
|
||||
"Thought You Should Know (Without Backing Vocals)",
|
||||
"It Might Be You (from the movie Tootsie)",
|
||||
"Speedy Gonzales (Boone & Speedy Vocals)",
|
||||
"I'm Telling You Now (Two Semitones Down)",
|
||||
"The ELO Medley 1",
|
||||
"Can't Fight This Feeling (Minus Piano)",
|
||||
"The Look Of Love",
|
||||
"Revolution (Without Backing Vocals)",
|
||||
"Right Here, Right Now (My Heart Belongs to You)",
|
||||
"Hush Hush",
|
||||
"On The Floor",
|
||||
"(I've Had) The Time Of My Life",
|
||||
]
|
||||
|
||||
print("🔍 Testing Improved Title Cleaning")
|
||||
print("=" * 50)
|
||||
|
||||
for title in test_titles:
|
||||
print(f"\n📝 Original: '{title}'")
|
||||
|
||||
try:
|
||||
variations = db._generate_title_variations(title)
|
||||
print(f" 🧹 Cleaned variations ({len(variations)}):")
|
||||
for i, variation in enumerate(variations, 1):
|
||||
print(f" {i}. '{variation}'")
|
||||
|
||||
except Exception as e:
|
||||
print(f" ❌ Error: {e}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_title_cleaning()
|
||||
234
src/utils/artist_lookup.py
Normal file
234
src/utils/artist_lookup.py
Normal file
@ -0,0 +1,234 @@
|
||||
"""
|
||||
Artist lookup table utilities for MusicBrainz Data Cleaner.
|
||||
Provides fallback artist matching when database search fails.
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
from pathlib import Path
|
||||
from typing import Dict, Optional, List, Any
|
||||
from fuzzywuzzy import fuzz
|
||||
|
||||
|
||||
class ArtistLookupTable:
|
||||
"""Handles artist name variations lookup for fallback matching."""
|
||||
|
||||
def __init__(self, lookup_file: str = "data/artist_lookup.json"):
|
||||
"""
|
||||
Initialize the artist lookup table.
|
||||
|
||||
Args:
|
||||
lookup_file: Path to the JSON lookup file
|
||||
"""
|
||||
self.lookup_file = Path(lookup_file)
|
||||
self.lookup_data = self._load_lookup_data()
|
||||
|
||||
def _load_lookup_data(self) -> Dict[str, Any]:
|
||||
"""Load the artist lookup data from JSON file."""
|
||||
try:
|
||||
if self.lookup_file.exists():
|
||||
with open(self.lookup_file, 'r', encoding='utf-8') as f:
|
||||
data = json.load(f)
|
||||
print(f"📚 Loaded artist lookup table with {len(data.get('artist_variations', {}))} artists")
|
||||
return data
|
||||
else:
|
||||
print(f"⚠️ Artist lookup file not found: {self.lookup_file}")
|
||||
return {"artist_variations": {}, "metadata": {}}
|
||||
except Exception as e:
|
||||
print(f"❌ Error loading artist lookup table: {e}")
|
||||
return {"artist_variations": {}, "metadata": {}}
|
||||
|
||||
def find_artist_by_variation(self, artist_name: str, min_score: float = 0.8) -> Optional[Dict[str, Any]]:
|
||||
"""
|
||||
Find artist by name variation using fuzzy matching.
|
||||
|
||||
Args:
|
||||
artist_name: The artist name to search for
|
||||
min_score: Minimum fuzzy match score (0.0 to 1.0)
|
||||
|
||||
Returns:
|
||||
Artist info dict with mbid, name, and score, or None if not found
|
||||
"""
|
||||
if not artist_name or not self.lookup_data.get('artist_variations'):
|
||||
return None
|
||||
|
||||
artist_name_clean = artist_name.strip()
|
||||
best_match = None
|
||||
best_score = 0
|
||||
|
||||
for canonical_name, artist_info in self.lookup_data['artist_variations'].items():
|
||||
# Check against canonical name
|
||||
score = fuzz.ratio(artist_name_clean.lower(), canonical_name.lower()) / 100.0
|
||||
if score > best_score and score >= min_score:
|
||||
best_score = score
|
||||
best_match = {
|
||||
'canonical_name': canonical_name,
|
||||
'mbid': artist_info['mbid'],
|
||||
'score': score,
|
||||
'notes': artist_info.get('notes', ''),
|
||||
'match_type': 'canonical_name'
|
||||
}
|
||||
|
||||
# Check against variations
|
||||
for variation in artist_info.get('variations', []):
|
||||
score = fuzz.ratio(artist_name_clean.lower(), variation.lower()) / 100.0
|
||||
if score > best_score and score >= min_score:
|
||||
best_score = score
|
||||
best_match = {
|
||||
'canonical_name': canonical_name,
|
||||
'mbid': artist_info['mbid'],
|
||||
'score': score,
|
||||
'notes': artist_info.get('notes', ''),
|
||||
'match_type': 'variation',
|
||||
'matched_variation': variation
|
||||
}
|
||||
|
||||
return best_match
|
||||
|
||||
def add_artist_variation(self, canonical_name: str, mbid: str, variations: List[str], notes: str = "") -> bool:
|
||||
"""
|
||||
Add a new artist variation to the lookup table.
|
||||
|
||||
Args:
|
||||
canonical_name: The canonical artist name
|
||||
mbid: The MusicBrainz ID
|
||||
variations: List of name variations
|
||||
notes: Optional notes about the artist
|
||||
|
||||
Returns:
|
||||
True if successfully added, False otherwise
|
||||
"""
|
||||
try:
|
||||
if not self.lookup_data.get('artist_variations'):
|
||||
self.lookup_data['artist_variations'] = {}
|
||||
|
||||
self.lookup_data['artist_variations'][canonical_name] = {
|
||||
'mbid': mbid,
|
||||
'variations': variations,
|
||||
'notes': notes
|
||||
}
|
||||
|
||||
# Update metadata
|
||||
if 'metadata' not in self.lookup_data:
|
||||
self.lookup_data['metadata'] = {}
|
||||
|
||||
self.lookup_data['metadata']['total_artists'] = len(self.lookup_data['artist_variations'])
|
||||
|
||||
return self._save_lookup_data()
|
||||
|
||||
except Exception as e:
|
||||
print(f"❌ Error adding artist variation: {e}")
|
||||
return False
|
||||
|
||||
def _save_lookup_data(self) -> bool:
|
||||
"""Save the lookup data back to the JSON file."""
|
||||
try:
|
||||
# Ensure directory exists
|
||||
self.lookup_file.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
with open(self.lookup_file, 'w', encoding='utf-8') as f:
|
||||
json.dump(self.lookup_data, f, indent=2, ensure_ascii=False)
|
||||
|
||||
print(f"💾 Saved artist lookup table to {self.lookup_file}")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
print(f"❌ Error saving artist lookup table: {e}")
|
||||
return False
|
||||
|
||||
def get_statistics(self) -> Dict[str, Any]:
|
||||
"""Get statistics about the lookup table."""
|
||||
if not self.lookup_data.get('artist_variations'):
|
||||
return {'total_artists': 0, 'total_variations': 0}
|
||||
|
||||
total_variations = sum(
|
||||
len(artist_info.get('variations', [])) + 1 # +1 for canonical name
|
||||
for artist_info in self.lookup_data['artist_variations'].values()
|
||||
)
|
||||
|
||||
return {
|
||||
'total_artists': len(self.lookup_data['artist_variations']),
|
||||
'total_variations': total_variations,
|
||||
'metadata': self.lookup_data.get('metadata', {})
|
||||
}
|
||||
|
||||
def list_artists(self) -> List[str]:
|
||||
"""Get a list of all canonical artist names in the lookup table."""
|
||||
return list(self.lookup_data.get('artist_variations', {}).keys())
|
||||
|
||||
def search_artists(self, query: str, min_score: float = 0.6) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Search for artists in the lookup table.
|
||||
|
||||
Args:
|
||||
query: Search query
|
||||
min_score: Minimum fuzzy match score
|
||||
|
||||
Returns:
|
||||
List of matching artists with scores
|
||||
"""
|
||||
results = []
|
||||
query_lower = query.lower()
|
||||
|
||||
for canonical_name, artist_info in self.lookup_data.get('artist_variations', {}).items():
|
||||
# Check canonical name
|
||||
score = fuzz.ratio(query_lower, canonical_name.lower()) / 100.0
|
||||
if score >= min_score:
|
||||
results.append({
|
||||
'canonical_name': canonical_name,
|
||||
'mbid': artist_info['mbid'],
|
||||
'score': score,
|
||||
'match_type': 'canonical_name',
|
||||
'notes': artist_info.get('notes', '')
|
||||
})
|
||||
|
||||
# Check variations
|
||||
for variation in artist_info.get('variations', []):
|
||||
score = fuzz.ratio(query_lower, variation.lower()) / 100.0
|
||||
if score >= min_score:
|
||||
results.append({
|
||||
'canonical_name': canonical_name,
|
||||
'mbid': artist_info['mbid'],
|
||||
'score': score,
|
||||
'match_type': 'variation',
|
||||
'matched_variation': variation,
|
||||
'notes': artist_info.get('notes', '')
|
||||
})
|
||||
|
||||
# Sort by score (highest first)
|
||||
results.sort(key=lambda x: x['score'], reverse=True)
|
||||
return results
|
||||
|
||||
|
||||
# Global instance for easy access
|
||||
artist_lookup = ArtistLookupTable()
|
||||
|
||||
|
||||
def find_artist_fallback(artist_name: str, min_score: float = 0.8) -> Optional[Dict[str, Any]]:
|
||||
"""
|
||||
Convenience function to find artist using the lookup table.
|
||||
|
||||
Args:
|
||||
artist_name: The artist name to search for
|
||||
min_score: Minimum fuzzy match score
|
||||
|
||||
Returns:
|
||||
Artist info dict or None if not found
|
||||
"""
|
||||
return artist_lookup.find_artist_by_variation(artist_name, min_score)
|
||||
|
||||
|
||||
def add_artist_fallback(canonical_name: str, mbid: str, variations: List[str], notes: str = "") -> bool:
|
||||
"""
|
||||
Convenience function to add artist variation to lookup table.
|
||||
|
||||
Args:
|
||||
canonical_name: The canonical artist name
|
||||
mbid: The MusicBrainz ID
|
||||
variations: List of name variations
|
||||
notes: Optional notes about the artist
|
||||
|
||||
Returns:
|
||||
True if successfully added, False otherwise
|
||||
"""
|
||||
return artist_lookup.add_artist_variation(canonical_name, mbid, variations, notes)
|
||||
211
src/utils/artist_title_processing.py
Normal file
211
src/utils/artist_title_processing.py
Normal file
@ -0,0 +1,211 @@
|
||||
import re
|
||||
import json
|
||||
import os
|
||||
from typing import List, Tuple
|
||||
|
||||
def parse_complex_collaboration(artist_string: str) -> Tuple[str, List[str]]:
|
||||
"""
|
||||
Parse complex collaboration strings like "Pitbull ft. Ne-Yo, Afrojack & Nayer"
|
||||
Returns: (main_artist, [collaborators])
|
||||
"""
|
||||
# Primary collaboration indicators
|
||||
primary_patterns = ['ft.', 'feat.', 'featuring', 'ft', 'feat']
|
||||
# Secondary collaboration indicators (need more careful handling)
|
||||
secondary_patterns = ['&', 'and', ',']
|
||||
# Check if this is a collaboration
|
||||
is_collaboration = False
|
||||
split_pattern = None
|
||||
# Special case: Handle malformed artist names like "ft Jamie Foxx West, Kanye"
|
||||
# This should be "Kanye West ft. Jamie Foxx"
|
||||
if artist_string.lower().startswith(('ft ', 'feat ')):
|
||||
# This is a malformed collaboration string
|
||||
# Try to extract the actual artists from the rest
|
||||
remaining = artist_string[artist_string.find(' ') + 1:].strip()
|
||||
if ',' in remaining:
|
||||
# Split on comma and reverse the order
|
||||
parts = [part.strip() for part in remaining.split(',')]
|
||||
if len(parts) >= 2:
|
||||
# Assume the last part is the main artist
|
||||
main_artist = parts[-1].strip()
|
||||
collaborators = parts[:-1]
|
||||
return (main_artist, collaborators)
|
||||
for pattern in primary_patterns:
|
||||
if pattern.lower() in artist_string.lower():
|
||||
is_collaboration = True
|
||||
match = re.search(re.escape(pattern), artist_string, re.IGNORECASE)
|
||||
if match:
|
||||
split_pattern = match.group(0)
|
||||
else:
|
||||
split_pattern = pattern
|
||||
break
|
||||
# If no primary collaboration found, check secondary patterns
|
||||
if not is_collaboration:
|
||||
for pattern in secondary_patterns:
|
||||
if pattern == ',':
|
||||
# Handle comma-separated artists (e.g., "Ariana Grande, Normani, Nicki Minaj")
|
||||
if ',' in artist_string:
|
||||
comma_count = artist_string.count(',')
|
||||
if comma_count >= 1:
|
||||
parts = [part.strip() for part in artist_string.split(',')]
|
||||
if len(parts) >= 2:
|
||||
main_artist = parts[0]
|
||||
collaborators = parts[1:]
|
||||
return (main_artist, collaborators)
|
||||
else:
|
||||
if pattern in ['&', 'and']:
|
||||
pattern_regex = r'\s' + re.escape(pattern) + r'\s'
|
||||
else:
|
||||
pattern_regex = r'\b' + re.escape(pattern) + r'\b'
|
||||
if re.search(pattern_regex, artist_string, re.IGNORECASE):
|
||||
parts = re.split(pattern_regex, artist_string, flags=re.IGNORECASE)
|
||||
if len(parts) == 2:
|
||||
part1 = parts[0].strip()
|
||||
part2 = parts[1].strip()
|
||||
part1_words = len(part1.split())
|
||||
part2_words = len(part2.split())
|
||||
# If one part has a comma, it's likely a collaboration
|
||||
if ',' in part1 or ',' in part2:
|
||||
is_collaboration = True
|
||||
split_pattern = pattern
|
||||
break
|
||||
# For "&" and "and" separators, check against known band names
|
||||
try:
|
||||
json_path = os.path.join(os.path.dirname(__file__), '..', '..', 'data', 'known_artists.json')
|
||||
with open(json_path, 'r') as f:
|
||||
known_data = json.load(f)
|
||||
known_band_names = {name.lower() for name in known_data.get('groups', [])}
|
||||
except (FileNotFoundError, json.JSONDecodeError, KeyError):
|
||||
known_band_names = {
|
||||
"gerry and the pacemakers", "simon & garfunkel", "hall & oates",
|
||||
"brooks & dunn", "the everly brothers"
|
||||
}
|
||||
if artist_string.lower() in known_band_names:
|
||||
continue
|
||||
if pattern.lower() == 'and':
|
||||
if 'the ' in part2.lower():
|
||||
continue
|
||||
is_collaboration = True
|
||||
split_pattern = pattern
|
||||
break
|
||||
if not is_collaboration:
|
||||
return (artist_string, [])
|
||||
# Split on the pattern using the same regex that was used for detection
|
||||
if split_pattern in ['&', 'and']:
|
||||
pattern_regex = r'\s' + re.escape(split_pattern) + r'\s'
|
||||
parts = re.split(pattern_regex, artist_string, flags=re.IGNORECASE)
|
||||
else:
|
||||
parts = artist_string.split(split_pattern)
|
||||
if len(parts) < 2:
|
||||
return (artist_string, [])
|
||||
main_artist = parts[0].strip()
|
||||
collaborators_string = split_pattern.join(parts[1:]).strip()
|
||||
collaborators = parse_collaborators(collaborators_string)
|
||||
return (main_artist, collaborators)
|
||||
|
||||
def parse_collaborators(collaborators_string: str) -> List[str]:
|
||||
if not collaborators_string:
|
||||
return []
|
||||
separators = r'[,&]|\b(?:and)\b'
|
||||
parts = re.split(separators, collaborators_string, flags=re.IGNORECASE)
|
||||
collaborators = [part.strip() for part in parts if part.strip()]
|
||||
return collaborators
|
||||
|
||||
def generate_title_variations(title: str) -> List[str]:
|
||||
search_titles = [title.strip()]
|
||||
title_fixes = title.strip()
|
||||
|
||||
# Normalize apostrophes (curly to straight)
|
||||
title_fixes = title_fixes.replace(''', "'").replace(''', "'")
|
||||
if title_fixes != title.strip():
|
||||
search_titles.append(title_fixes)
|
||||
|
||||
# Load contraction fixes from JSON file
|
||||
try:
|
||||
from ..utils.data_loader import data_loader
|
||||
contraction_fixes = data_loader.load_contraction_fixes()
|
||||
except:
|
||||
# Fallback to hardcoded fixes if file loading fails
|
||||
contraction_fixes = {
|
||||
"dont": "don't", "don t": "don't", "cant": "can't", "can t": "can't",
|
||||
"wont": "won't", "won t": "won't", "im": "I'm", "i m": "I'm",
|
||||
"ive": "I've", "i ve": "I've", "id": "I'd", "i d": "I'd",
|
||||
"ill": "I'll", "i ll": "I'll", "isnt": "isn't", "isn t": "isn't",
|
||||
"arent": "aren't", "aren t": "aren't", "wasnt": "wasn't", "wasn t": "wasn't",
|
||||
"werent": "weren't", "weren t": "weren't", "hasnt": "hasn't", "hasn t": "hasn't",
|
||||
"havent": "haven't", "haven t": "haven't", "shouldnt": "shouldn't", "shouldn t": "shouldn't",
|
||||
"couldnt": "couldn't", "couldn t": "couldn't", "wouldnt": "wouldn't", "wouldn t": "wouldn't",
|
||||
"didnt": "didn't", "didn t": "didn't", "theyre": "they're", "they re": "they're",
|
||||
"youre": "you're", "you re": "you're", "whos": "who's", "who s": "who's",
|
||||
"whats": "what's", "what s": "what's", "thats": "that's", "that s": "that's",
|
||||
"lets": "let's", "let s": "let's", "theres": "there's", "there s": "there's",
|
||||
"heres": "here's", "here s": "here's", "hows": "how's", "how s": "how's",
|
||||
"shes": "she's", "she s": "she's", "hes": "he's", "he s": "he's",
|
||||
"were": "we're", "we re": "we're", "weve": "we've", "we ve": "we've",
|
||||
"well": "we'll", "we ll": "we'll", "its": "it's", "it s": "it's",
|
||||
"itll": "it'll", "it ll": "it'll", "yall": "y'all", "y all": "y'all",
|
||||
"wouldve": "would've", "would ve": "would've", "couldve": "could've", "could ve": "could've",
|
||||
"shouldve": "should've", "should ve": "should've", "mightve": "might've", "might ve": "might've",
|
||||
"mustve": "must've", "must ve": "must've", "maam": "ma'am", "ma am": "ma'am",
|
||||
"oclock": "o'clock", "o clock": "o'clock", "aint": "ain't", "ain t": "ain't",
|
||||
"rocknroll": "rock 'n' roll", "rock n roll": "rock 'n' roll"
|
||||
}
|
||||
|
||||
# Apply contraction fixes
|
||||
for broken_contraction, fixed_contraction in contraction_fixes.items():
|
||||
# Use word boundaries to avoid partial matches
|
||||
pattern = r'\b' + re.escape(broken_contraction) + r'\b'
|
||||
fixed_title = re.sub(pattern, fixed_contraction, title_fixes, flags=re.IGNORECASE)
|
||||
if fixed_title != title_fixes:
|
||||
title_fixes = fixed_title
|
||||
if title_fixes not in search_titles:
|
||||
search_titles.append(title_fixes)
|
||||
|
||||
# Additional hardcoded fixes for edge cases
|
||||
additional_fixes = [
|
||||
(r'\bPhunk\b', "Funk"), (r'\bBout\b', "About")
|
||||
]
|
||||
for pattern, replacement in additional_fixes:
|
||||
fixed_title = re.sub(pattern, replacement, title_fixes, flags=re.IGNORECASE)
|
||||
if fixed_title != title_fixes:
|
||||
title_fixes = fixed_title
|
||||
if title_fixes not in search_titles:
|
||||
search_titles.append(title_fixes)
|
||||
|
||||
# Remove specific patterns first, then general parentheses
|
||||
specific_patterns = [
|
||||
r'\s*\(Karaoke Version\)', r'\s*\(Karaoke\)', r'\s*\(Instrumental\)', r'\s*\(Backing Track\)',
|
||||
r'\s*\(live [^)]*\)', r'\s*\(Live [^)]*\)', r'\s*\(Acoustic\)', r'\s*\(acoustic\)',
|
||||
r'\s*\(Without Backing Vocals\)', r'\s*\(Clean\)', r'\s*\(clean\)', r'\s*\(Remix\)',
|
||||
r'\s*\(Radio Edit\)', r'\s*\(radio edit\)', r'\s*\(Extended Mix\)', r'\s*\(extended mix\)',
|
||||
r'\s*\(Single Version\)', r'\s*\(single version\)', r'\s*\(Album Version\)', r'\s*\(album version\)',
|
||||
r'\s*\(Original Mix\)', r'\s*\(original mix\)', r'\s*\(John Lewis Christmas Ad \d+\)',
|
||||
r'\s*\(from the movie [^)]*\)', r'\s*\(from the [^)]*\)', r'\s*\(feat\. [^)]*\)',
|
||||
r'\s*\(featuring [^)]*\)', r'\s*\(ft\. [^)]*\)', r'\s*\(duet\)', r'\s*\(Duet\)',
|
||||
r'\s*\(Two Semitones Down\)', r'\s*\(Minus Piano\)', r'\s*\(Cut Down\)',
|
||||
r'\s*\(Boone & Speedy Vocals\)', r'\s*\(My Heart Belongs to You\)'
|
||||
]
|
||||
|
||||
# Apply specific patterns first
|
||||
for pattern in specific_patterns:
|
||||
specific_clean = re.sub(pattern, '', title.strip(), flags=re.IGNORECASE)
|
||||
specific_clean = specific_clean.strip()
|
||||
if specific_clean != title.strip() and specific_clean and specific_clean not in search_titles:
|
||||
search_titles.append(specific_clean)
|
||||
|
||||
# Only remove general parentheses if no specific patterns matched
|
||||
if len(search_titles) == 1: # Only the original title
|
||||
clean_title = re.sub(r'\s*\([^)]*\)', '', title.strip()).strip()
|
||||
if clean_title != title.strip() and clean_title:
|
||||
search_titles.append(clean_title)
|
||||
|
||||
specific_patterns = [
|
||||
r'\s*\(Karaoke Version\)', r'\s*\(Karaoke\)', r'\s*\(Instrumental\)', r'\s*\(Backing Track\)',
|
||||
r'\s*\(live [^)]*\)', r'\s*\(Live [^)]*\)', r'\s*\(Acoustic\)', r'\s*\(acoustic\)',
|
||||
r'\s*\(Without Backing Vocals\)', r'\s*\(Clean\)', r'\s*\(clean\)', r'\s*\(Remix\)'
|
||||
]
|
||||
for pattern in specific_patterns:
|
||||
clean_title = re.sub(pattern, '', title_fixes, flags=re.IGNORECASE).strip()
|
||||
if clean_title and clean_title not in search_titles:
|
||||
search_titles.append(clean_title)
|
||||
|
||||
return search_titles
|
||||
@ -65,6 +65,16 @@ class DataLoader:
|
||||
|
||||
return self._known_recordings
|
||||
|
||||
def load_contraction_fixes(self) -> Dict[str, str]:
|
||||
"""Load contraction fixes from JSON file."""
|
||||
contractions_file = self.data_dir / "contraction_fixes.json"
|
||||
try:
|
||||
with open(contractions_file, 'r', encoding='utf-8') as f:
|
||||
return json.load(f)
|
||||
except (FileNotFoundError, json.JSONDecodeError) as e:
|
||||
print(f"Warning: Could not load contraction fixes data: {e}")
|
||||
return {}
|
||||
|
||||
def reload_data(self) -> None:
|
||||
"""Reload data from files (useful for testing or updates)."""
|
||||
self._known_artists = None
|
||||
|
||||
157
start_services.sh
Executable file
157
start_services.sh
Executable file
@ -0,0 +1,157 @@
|
||||
#!/bin/bash
|
||||
|
||||
# MusicBrainz Cleaner - Quick Start Script
|
||||
# This script automates the startup of MusicBrainz services
|
||||
|
||||
set -e
|
||||
|
||||
echo "🚀 Starting MusicBrainz services..."
|
||||
|
||||
# Colors for output
|
||||
RED='\033[0;31m'
|
||||
GREEN='\033[0;32m'
|
||||
YELLOW='\033[1;33m'
|
||||
BLUE='\033[0;34m'
|
||||
NC='\033[0m' # No Color
|
||||
|
||||
# Function to print colored output
|
||||
print_status() {
|
||||
echo -e "${BLUE}[INFO]${NC} $1"
|
||||
}
|
||||
|
||||
print_success() {
|
||||
echo -e "${GREEN}[SUCCESS]${NC} $1"
|
||||
}
|
||||
|
||||
print_warning() {
|
||||
echo -e "${YELLOW}[WARNING]${NC} $1"
|
||||
}
|
||||
|
||||
print_error() {
|
||||
echo -e "${RED}[ERROR]${NC} $1"
|
||||
}
|
||||
|
||||
# Check if Docker is running
|
||||
if ! docker info > /dev/null 2>&1; then
|
||||
print_error "Docker is not running. Please start Docker Desktop first."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
print_success "Docker is running"
|
||||
|
||||
# Check if we're in the right directory
|
||||
if [ ! -f "docker-compose.yml" ]; then
|
||||
print_error "This script must be run from the musicbrainz-cleaner directory"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Check if musicbrainz-docker directory exists
|
||||
if [ ! -d "../musicbrainz-docker" ]; then
|
||||
print_error "musicbrainz-docker directory not found. Please ensure you're in the musicbrainz-server directory."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Navigate to musicbrainz-docker
|
||||
cd ../musicbrainz-docker
|
||||
|
||||
print_status "Checking for port conflicts..."
|
||||
|
||||
# Check if port 5000 is available
|
||||
if lsof -i :5000 > /dev/null 2>&1; then
|
||||
print_warning "Port 5000 is in use. Using port 5001 instead."
|
||||
PORT=5001
|
||||
else
|
||||
print_success "Port 5000 is available"
|
||||
PORT=5000
|
||||
fi
|
||||
|
||||
# Stop any existing containers
|
||||
print_status "Stopping existing containers..."
|
||||
docker-compose down > /dev/null 2>&1 || true
|
||||
|
||||
# Remove any conflicting containers
|
||||
print_status "Cleaning up conflicting containers..."
|
||||
docker rm -f musicbrainz-docker-db-1 > /dev/null 2>&1 || true
|
||||
|
||||
# Start services
|
||||
print_status "Starting MusicBrainz services on port $PORT..."
|
||||
MUSICBRAINZ_WEB_SERVER_PORT=$PORT docker-compose up -d
|
||||
|
||||
print_success "Services started successfully!"
|
||||
|
||||
# Wait for database to be ready
|
||||
print_status "Waiting for database to initialize (this may take 5-10 minutes)..."
|
||||
print_status "You can monitor progress with: docker-compose logs -f db"
|
||||
|
||||
# Check if database is ready
|
||||
attempts=0
|
||||
max_attempts=60
|
||||
while [ $attempts -lt $max_attempts ]; do
|
||||
if docker-compose exec -T db pg_isready -U musicbrainz > /dev/null 2>&1; then
|
||||
print_success "Database is ready!"
|
||||
break
|
||||
fi
|
||||
attempts=$((attempts + 1))
|
||||
print_status "Waiting for database... (attempt $attempts/$max_attempts)"
|
||||
sleep 10
|
||||
done
|
||||
|
||||
if [ $attempts -eq $max_attempts ]; then
|
||||
print_warning "Database may still be initializing. You can check status with: docker-compose logs db"
|
||||
fi
|
||||
|
||||
# Create .env file in musicbrainz-cleaner directory
|
||||
cd ../musicbrainz-cleaner
|
||||
|
||||
print_status "Creating environment configuration..."
|
||||
|
||||
cat > .env << EOF
|
||||
# Database connection (default Docker setup)
|
||||
DB_HOST=172.18.0.2
|
||||
DB_PORT=5432
|
||||
DB_NAME=musicbrainz_db
|
||||
DB_USER=musicbrainz
|
||||
DB_PASSWORD=musicbrainz
|
||||
|
||||
# MusicBrainz web server
|
||||
MUSICBRAINZ_WEB_SERVER_PORT=$PORT
|
||||
EOF
|
||||
|
||||
print_success "Environment configuration created"
|
||||
|
||||
# Test connection
|
||||
print_status "Testing connection..."
|
||||
if docker-compose run --rm musicbrainz-cleaner python3 -c "
|
||||
import sys
|
||||
sys.path.insert(0, '/app')
|
||||
from src.api.database import MusicBrainzDatabase
|
||||
try:
|
||||
db = MusicBrainzDatabase()
|
||||
print('✅ Database connection successful')
|
||||
except Exception as e:
|
||||
print(f'❌ Database connection failed: {e}')
|
||||
sys.exit(1)
|
||||
" 2>/dev/null; then
|
||||
print_success "Connection test passed!"
|
||||
else
|
||||
print_warning "Connection test failed. Services may still be initializing."
|
||||
fi
|
||||
|
||||
echo ""
|
||||
print_success "MusicBrainz services are now running!"
|
||||
echo ""
|
||||
echo "📊 Service Status:"
|
||||
echo " - Web Server: http://localhost:$PORT"
|
||||
echo " - Database: PostgreSQL (internal)"
|
||||
echo " - Search: Solr (internal)"
|
||||
echo ""
|
||||
echo "🧪 Next steps:"
|
||||
echo " 1. Run quick test: python3 quick_test_20.py"
|
||||
echo " 2. Run larger test: python3 bulk_test_1000.py"
|
||||
echo " 3. Use cleaner: python3 -m src.cli.main --input your_file.json --output cleaned.json"
|
||||
echo ""
|
||||
echo "📋 Useful commands:"
|
||||
echo " - View logs: cd ../musicbrainz-docker && docker-compose logs -f"
|
||||
echo " - Stop services: cd ../musicbrainz-docker && docker-compose down"
|
||||
echo " - Check status: cd ../musicbrainz-docker && docker-compose ps"
|
||||
echo ""
|
||||
Loading…
Reference in New Issue
Block a user