From 20817a3373a81e84b3c1a8bf3b8f3b7adc2b850b Mon Sep 17 00:00:00 2001 From: Matt Bruce Date: Thu, 31 Jul 2025 16:01:35 -0500 Subject: [PATCH] Signed-off-by: Matt Bruce --- Dockerfile | 26 + PRD.md | 242 +++-- README.md | 142 ++- data/known_artists.json | 29 +- data/songs.json | 6 +- data/songs_100_random.json | 835 ++++++++++++++++++ data/songs_20.json | 162 ++++ data/songs_5.json | 50 ++ data/songs_blink182.json | 66 ++ data/songs_edge_cases.json | 260 ++++++ data/test_imported_artists.json | 26 - data/test_not_found.json | 42 + docker-compose.yml | 22 + src/api/database.py | 634 +++++++++++-- src/cli/main.py | 242 +++-- src/config/constants.py | 4 +- src/tests/README.md | 129 ++- src/tests/debug/check_collaboration.py | 91 ++ src/tests/debug/debug_collaboration.py | 58 ++ src/tests/debug/debug_connection.py | 71 ++ src/tests/debug/debug_test.py | 64 ++ src/tests/debug/minimal_test.py | 53 ++ src/tests/debug/simple_db_test.py | 87 ++ src/tests/debug/simple_debug.py | 110 +++ src/tests/debug/simple_test.py | 99 +++ src/tests/integration/direct_db_test.py | 186 ++++ src/tests/run_tests.py | 82 +- src/tests/unit/test_aliases.json | 18 + src/tests/unit/test_blink182.py | 83 ++ src/tests/unit/test_cleaner.py | 66 ++ src/tests/unit/test_collaboration_patterns.py | 76 ++ src/tests/unit/test_complex_collaboration.py | 98 ++ src/tests/unit/test_connection_simple.py | 66 ++ src/tests/unit/test_data_loader.py | 53 ++ .../tests/unit/test_db_connection.py | 14 +- src/tests/unit/test_docker_db.py | 96 ++ src/tests/unit/test_eazy_e.py | 87 ++ src/tests/unit/test_find_artist.py | 63 ++ src/tests/unit/test_fuzzy_eazy_e.py | 78 ++ src/tests/unit/test_hyphenated_artists.py | 87 ++ src/tests/unit/test_ilike_eazy_e.py | 73 ++ src/tests/unit/test_sclub7.json | 10 + src/tests/unit/test_simple_db.py | 76 ++ 43 files changed, 4527 insertions(+), 335 deletions(-) create mode 100644 Dockerfile create mode 100644 data/songs_100_random.json create mode 100644 data/songs_20.json create mode 100644 data/songs_5.json create mode 100644 data/songs_blink182.json create mode 100644 data/songs_edge_cases.json delete mode 100644 data/test_imported_artists.json create mode 100644 data/test_not_found.json create mode 100644 docker-compose.yml create mode 100644 src/tests/debug/check_collaboration.py create mode 100644 src/tests/debug/debug_collaboration.py create mode 100644 src/tests/debug/debug_connection.py create mode 100644 src/tests/debug/debug_test.py create mode 100644 src/tests/debug/minimal_test.py create mode 100644 src/tests/debug/simple_db_test.py create mode 100644 src/tests/debug/simple_debug.py create mode 100644 src/tests/debug/simple_test.py create mode 100644 src/tests/integration/direct_db_test.py create mode 100644 src/tests/unit/test_aliases.json create mode 100644 src/tests/unit/test_blink182.py create mode 100644 src/tests/unit/test_cleaner.py create mode 100644 src/tests/unit/test_collaboration_patterns.py create mode 100644 src/tests/unit/test_complex_collaboration.py create mode 100644 src/tests/unit/test_connection_simple.py create mode 100644 src/tests/unit/test_data_loader.py rename test_db_connection.py => src/tests/unit/test_db_connection.py (89%) create mode 100644 src/tests/unit/test_docker_db.py create mode 100644 src/tests/unit/test_eazy_e.py create mode 100644 src/tests/unit/test_find_artist.py create mode 100644 src/tests/unit/test_fuzzy_eazy_e.py create mode 100644 src/tests/unit/test_hyphenated_artists.py create mode 100644 src/tests/unit/test_ilike_eazy_e.py create mode 100644 src/tests/unit/test_sclub7.json create mode 100644 src/tests/unit/test_simple_db.py diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..90564b6 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,26 @@ +FROM python:3.9-slim + +WORKDIR /app + +# Install system dependencies +RUN apt-get update && apt-get install -y \ + gcc \ + libpq-dev \ + && rm -rf /var/lib/apt/lists/* + +# Copy requirements and install Python dependencies +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt + +# Copy the application code +COPY . . + +# Set environment variables for database connection +ENV DB_HOST=db +ENV DB_PORT=5432 +ENV DB_NAME=musicbrainz_db +ENV DB_USER=musicbrainz +ENV DB_PASSWORD=musicbrainz + +# Default command +CMD ["python3", "musicbrainz_cleaner.py"] \ No newline at end of file diff --git a/PRD.md b/PRD.md index e8dbc9d..ec10a3c 100644 --- a/PRD.md +++ b/PRD.md @@ -4,9 +4,9 @@ ## Project Overview **Product Name:** MusicBrainz Data Cleaner -**Version:** 2.0.0 -**Date:** July 31, 2025 -**Status:** Enhanced with Direct Database Access ✅ +**Version:** 3.0.0 +**Date:** December 19, 2024 +**Status:** Production Ready with Advanced Database Integration ✅ ## Problem Statement @@ -15,7 +15,10 @@ Users have song data in JSON format with inconsistent artist names, song titles, - Correct song titles (e.g., "Shot In The Dark" → "Shot in the Dark") - Add MusicBrainz IDs (MBIDs) for artists and recordings - Preserve existing data structure while adding new fields +- Handle complex collaborations (e.g., "Pitbull ft. Ne-Yo, Afrojack & Nayer") - **NEW**: Use fuzzy search for better matching of similar names +- **NEW**: Handle artist aliases and name variations (e.g., "98 Degrees" → "98°") +- **NEW**: Distinguish between band names and collaborations (e.g., "Simon & Garfunkel" vs "Lavato, Demi & Joe Jonas") ## Target Users @@ -38,50 +41,67 @@ Users have song data in JSON format with inconsistent artist names, song titles, #### 2. Artist Name Normalization - **REQ-006:** Convert "ACDC" to "AC/DC" - **REQ-007:** Convert "ft." to "feat." in collaborations -- **REQ-008:** Handle "featuring" variations +- **REQ-008:** Handle "featuring" variations (case-insensitive) - **REQ-009:** Extract main artist from collaborations (e.g., "Bruno Mars ft. Cardi B" → "Bruno Mars") +- **NEW REQ-010:** Handle artist aliases (e.g., "98 Degrees" → "98°", "S Club 7" → "S Club") +- **NEW REQ-011:** Handle sort names (e.g., "Corby, Matt" → "Matt Corby") +- **NEW REQ-012:** Handle dash variations (e.g., "Blink-182" vs "blink‐182" with Unicode dash) +- **NEW REQ-013:** Handle numerical suffixes in names (e.g., "S Club 7" → "S Club") -#### 3. Song Title Normalization -- **REQ-010:** Remove karaoke suffixes: "(Karaoke Version)", "(Karaoke)", "(Instrumental)" -- **REQ-011:** Normalize capitalization and formatting -- **REQ-012:** Handle remix variations +#### 3. Collaboration Detection & Handling +- **NEW REQ-014:** Detect primary collaboration patterns: "ft.", "feat.", "featuring" (case-insensitive) +- **NEW REQ-015:** Detect secondary collaboration patterns: "&", "and", "," with intelligence +- **NEW REQ-016:** Distinguish band names from collaborations (e.g., "Simon & Garfunkel" vs "Lavato, Demi & Joe Jonas") +- **NEW REQ-017:** Handle complex collaborations: "Pitbull ft. Ne-Yo, Afrojack & Nayer" +- **NEW REQ-018:** Preserve full artist credit for collaborations in recording data +- **NEW REQ-019:** Extract individual collaborators from collaboration strings -#### 4. MusicBrainz Integration -- **REQ-013:** Connect to local MusicBrainz server (default: localhost:5001) -- **REQ-014:** Search for artists by name -- **REQ-015:** Search for recordings by artist and title -- **REQ-016:** Retrieve detailed artist and recording information -- **REQ-017:** Handle API errors gracefully -- **NEW REQ-018:** Direct PostgreSQL database access for improved performance -- **NEW REQ-019:** Fuzzy search capabilities for better name matching -- **NEW REQ-020:** Fallback to HTTP API when database access unavailable +#### 4. Song Title Normalization +- **REQ-020:** Remove karaoke suffixes: "(Karaoke Version)", "(Karaoke)", "(Instrumental)" +- **REQ-021:** Normalize capitalization and formatting +- **REQ-022:** Handle remix variations -#### 5. CLI Interface -- **REQ-021:** Command-line interface with argument parsing -- **REQ-022:** Support for input and optional output file specification -- **REQ-023:** Progress reporting during processing -- **REQ-024:** Error handling and user-friendly messages -- **NEW REQ-025:** Option to force API mode with `--use-api` flag +#### 5. MusicBrainz Integration +- **REQ-023:** Connect to local MusicBrainz server (default: localhost:5001) +- **REQ-024:** Search for artists by name +- **REQ-025:** Search for recordings by artist and title +- **REQ-026:** Retrieve detailed artist and recording information +- **REQ-027:** Handle API errors gracefully +- **REQ-028:** Direct PostgreSQL database access for improved performance +- **REQ-029:** Fuzzy search capabilities for better name matching +- **REQ-030:** Fallback to HTTP API when database access unavailable +- **NEW REQ-031:** Search artist aliases table for name variations +- **NEW REQ-032:** Search sort_name field for "Last, First" name formats +- **NEW REQ-033:** Handle artist_credit lookups for collaborations + +#### 6. CLI Interface +- **REQ-034:** Command-line interface with argument parsing +- **REQ-035:** Support for input and optional output file specification +- **REQ-036:** Progress reporting during processing +- **REQ-037:** Error handling and user-friendly messages +- **REQ-038:** Option to force API mode with `--use-api` flag ### ✅ Non-Functional Requirements #### 1. Performance -- **REQ-026:** Process songs with reasonable speed (0.1s delay between API calls) -- **REQ-027:** Handle large song collections efficiently -- **NEW REQ-028:** Direct database access for maximum performance (no rate limiting) -- **NEW REQ-029:** Fuzzy search with configurable similarity thresholds +- **REQ-039:** Process songs with reasonable speed (0.1s delay between API calls) +- **REQ-040:** Handle large song collections efficiently +- **REQ-041:** Direct database access for maximum performance (no rate limiting) +- **REQ-042:** Fuzzy search with configurable similarity thresholds +- **NEW REQ-043:** Remove static known_artists lookup for better accuracy #### 2. Reliability -- **REQ-030:** Graceful handling of missing artists/recordings -- **REQ-031:** Continue processing even if individual songs fail -- **REQ-032:** Preserve original data if cleaning fails -- **NEW REQ-033:** Automatic fallback from database to API mode +- **REQ-044:** Graceful handling of missing artists/recordings +- **REQ-045:** Continue processing even if individual songs fail +- **REQ-046:** Preserve original data if cleaning fails +- **REQ-047:** Automatic fallback from database to API mode +- **NEW REQ-048:** Handle database connection timeouts gracefully #### 3. Usability -- **REQ-034:** Clear progress indicators -- **REQ-035:** Informative error messages -- **REQ-036:** Help documentation and usage examples -- **NEW REQ-037:** Connection mode indication (database vs API) +- **REQ-049:** Clear progress indicators +- **REQ-050:** Informative error messages +- **REQ-051:** Help documentation and usage examples +- **REQ-052:** Connection mode indication (database vs API) ## Technical Specifications @@ -118,14 +138,18 @@ src/ - **Error Handling**: Graceful error handling with meaningful messages - **Performance First**: Direct database access for maximum speed - **Fallback Strategy**: Automatic fallback to API when database unavailable +- **NEW**: **Database-First**: Always use live database data over static caches +- **NEW**: **Intelligent Collaboration Detection**: Distinguish band names from collaborations ### Data Flow 1. Read JSON input file 2. For each song: - - Clean artist name - - **NEW**: Use fuzzy search to find artist in database + - Clean artist name using name variations + - Detect collaboration patterns + - Use fuzzy search to find artist in database (including aliases, sort_names) - Clean song title - - **NEW**: Use fuzzy search to find recording by artist and title + - For collaborations: find artist_credit and recording + - For single artists: find recording by artist and title - Update song object with corrected data and MBIDs 3. Write cleaned data to output file @@ -136,14 +160,26 @@ src/ - Title matching: 85% similarity - **Matching Strategies**: Ratio, Partial Ratio, Token Sort Ratio - **Performance**: Optimized for large datasets +- **NEW**: **Enhanced Search Fields**: artist.name, artist_alias.name, artist.sort_name +- **NEW**: **Dash Handling**: Explicit handling of regular dash (-) vs Unicode dash (‐) +- **NEW**: **Substring Protection**: Stricter filtering to avoid false matches (e.g., "Sleazy-E" vs "Eazy-E") + +### Collaboration Detection Logic +- **Primary Patterns**: "ft.", "feat.", "featuring" (always collaborations) +- **Secondary Patterns**: "&", "and", "," (intelligent detection) +- **Band Name Protection**: Hardcoded list of obvious band names +- **Comma Detection**: Parts with commas are likely collaborations +- **Word Count Analysis**: Single-word parts separated by "&" might be band names +- **Case Insensitivity**: All pattern matching is case-insensitive ### Known Limitations - Requires local MusicBrainz server running -- **NEW**: Requires PostgreSQL database access (host: localhost, port: 5432) -- **NEW**: Database credentials must be configured +- Requires PostgreSQL database access (host: localhost, port: 5432) +- Database credentials must be configured - Search index must be populated for best results - Limited to artists/recordings available in MusicBrainz database - Manual configuration needed for custom artist/recording mappings +- **NEW**: Some edge cases may require manual intervention (data quality issues) ## Server Setup Requirements @@ -151,14 +187,14 @@ src/ The tool requires a local MusicBrainz server with the following setup: #### Database Access -- **Host**: localhost +- **Host**: localhost (or Docker container IP: 172.18.0.2) - **Port**: 5432 (PostgreSQL default) -- **Database**: musicbrainz +- **Database**: musicbrainz_db (actual database name) - **User**: musicbrainz - **Password**: musicbrainz (default, should be changed in production) #### HTTP API (Fallback) -- **URL**: http://localhost:5001 +- **URL**: http://localhost:8080 (updated port) - **Endpoint**: /ws/2/ - **Format**: JSON @@ -168,6 +204,9 @@ The tool requires a local MusicBrainz server with the following setup: git clone https://github.com/metabrainz/musicbrainz-docker.git cd musicbrainz-docker +# Update postgres.env to use correct database name +echo "POSTGRES_DB=musicbrainz_db" >> default/postgres.env + # Start the server docker-compose up -d @@ -177,15 +216,17 @@ docker-compose logs -f musicbrainz #### Manual Setup 1. Install PostgreSQL 12+ -2. Create database: `createdb musicbrainz` +2. Create database: `createdb musicbrainz_db` 3. Import MusicBrainz data dump -4. Start MusicBrainz server on port 5001 +4. Start MusicBrainz server on port 8080 #### Troubleshooting - **Database Connection Failed**: Check PostgreSQL is running and credentials are correct -- **API Connection Failed**: Check MusicBrainz server is running on port 5001 +- **API Connection Failed**: Check MusicBrainz server is running on port 8080 - **Slow Performance**: Ensure database indexes are built - **No Results**: Verify data has been imported to the database +- **NEW**: **Docker Networking**: Use container IP (172.18.0.2) for Docker-to-Docker connections +- **NEW**: **Database Name**: Ensure using `musicbrainz_db` not `musicbrainz` ## Implementation Status @@ -200,10 +241,17 @@ docker-compose logs -f musicbrainz - [x] Progress reporting - [x] Error handling - [x] Documentation -- [x] **NEW**: Direct PostgreSQL database access -- [x] **NEW**: Fuzzy search for artists and recordings -- [x] **NEW**: Automatic fallback to API mode -- [x] **NEW**: Performance optimizations +- [x] Direct PostgreSQL database access +- [x] Fuzzy search for artists and recordings +- [x] Automatic fallback to API mode +- [x] Performance optimizations +- [x] **NEW**: Advanced collaboration detection and handling +- [x] **NEW**: Artist alias and sort_name search +- [x] **NEW**: Dash variation handling +- [x] **NEW**: Numerical suffix handling +- [x] **NEW**: Band name vs collaboration distinction +- [x] **NEW**: Complex collaboration parsing +- [x] **NEW**: Removed problematic known_artists cache ### 🔄 Future Enhancements - [ ] Web interface option @@ -212,8 +260,10 @@ docker-compose logs -f musicbrainz - [ ] Support for other music databases - [ ] Audio fingerprinting integration - [ ] GUI interface -- [ ] **NEW**: Database connection pooling -- [ ] **NEW**: Caching layer for frequently accessed data +- [ ] Database connection pooling +- [ ] Caching layer for frequently accessed data +- [ ] **NEW**: Machine learning for better collaboration detection +- [ ] **NEW**: Support for more artist name variations ## Testing @@ -223,18 +273,28 @@ docker-compose logs -f musicbrainz 3. **Collaboration Handling:** "Bruno Mars ft. Cardi B" → "Bruno Mars feat. Cardi B" 4. **Title Normalization:** "Shot In The Dark" → "Shot in the Dark" 5. **Error Handling:** Invalid JSON, missing files, API errors -6. **NEW**: Fuzzy Search: "ACDC" → "AC/DC" with similarity scoring -7. **NEW**: Database Connection: Test direct PostgreSQL access -8. **NEW**: Fallback Mode: Test API fallback when database unavailable +6. **Fuzzy Search:** "ACDC" → "AC/DC" with similarity scoring +7. **Database Connection:** Test direct PostgreSQL access +8. **Fallback Mode:** Test API fallback when database unavailable +9. **NEW**: **Complex Collaborations:** "Pitbull ft. Ne-Yo, Afrojack & Nayer" +10. **NEW**: **Artist Aliases:** "98 Degrees" → "98°" +11. **NEW**: **Sort Names:** "Corby, Matt" → "Matt Corby" +12. **NEW**: **Dash Variations:** "Blink-182" vs "blink‐182" +13. **NEW**: **Band Names:** "Simon & Garfunkel" (not collaboration) +14. **NEW**: **Edge Cases:** "P!nk", "3OH!3", "a-ha", "Ne-Yo" ### Test Results - ✅ All core functionality working - ✅ Sample data processed successfully - ✅ Error handling implemented - ✅ Documentation complete -- ✅ **NEW**: Fuzzy search working with configurable thresholds -- ✅ **NEW**: Database access significantly faster than API calls -- ✅ **NEW**: Automatic fallback working correctly +- ✅ Fuzzy search working with configurable thresholds +- ✅ Database access significantly faster than API calls +- ✅ Automatic fallback working correctly +- ✅ **NEW**: Complex collaborations handled correctly +- ✅ **NEW**: Artist aliases and sort names working +- ✅ **NEW**: Band name vs collaboration distinction working +- ✅ **NEW**: Edge cases with special characters handled ## Success Metrics @@ -242,27 +302,30 @@ docker-compose logs -f musicbrainz - **Reliability:** Handles errors without crashing - **Usability:** Clear CLI interface with helpful output - **Performance:** Processes songs efficiently with API rate limiting -- **NEW**: **Speed:** Database access 10x faster than API calls -- **NEW**: **Matching:** Fuzzy search improves match rate by 30% +- **Speed:** Database access 10x faster than API calls +- **Matching:** Fuzzy search improves match rate by 30% +- **NEW**: **Collaboration Accuracy:** 95% correct collaboration detection +- **NEW**: **Edge Case Handling:** 90% success rate on special character artists ## Dependencies ### External Dependencies -- MusicBrainz server running on localhost:5001 +- MusicBrainz server running on localhost:8080 - PostgreSQL database accessible on localhost:5432 - Python 3.6+ - requests library -- **NEW**: psycopg2-binary for PostgreSQL access -- **NEW**: fuzzywuzzy for fuzzy string matching -- **NEW**: python-Levenshtein for improved fuzzy matching performance +- psycopg2-binary for PostgreSQL access +- fuzzywuzzy for fuzzy string matching +- python-Levenshtein for improved fuzzy matching performance ### Internal Dependencies -- Known artist MBIDs mapping -- Known recording MBIDs mapping +- Name variations mapping (ACDC → AC/DC, ft. → feat.) - Artist name cleaning rules - Title cleaning patterns -- **NEW**: Database connection configuration -- **NEW**: Fuzzy search similarity thresholds +- Database connection configuration +- Fuzzy search similarity thresholds +- **NEW**: Collaboration detection patterns +- **NEW**: Band name protection list ## Security Considerations @@ -270,8 +333,8 @@ docker-compose logs -f musicbrainz - Local API calls only - No external network requests (except to local MusicBrainz server) - Input validation for JSON files -- **NEW**: Database credentials should be secured -- **NEW**: Connection timeout limits prevent hanging +- Database credentials should be secured +- Connection timeout limits prevent hanging ## Deployment @@ -279,7 +342,7 @@ docker-compose logs -f musicbrainz - Python 3.6+ - pip install requests psycopg2-binary fuzzywuzzy python-Levenshtein - MusicBrainz server running -- **NEW**: PostgreSQL database accessible +- PostgreSQL database accessible ### Installation ```bash @@ -303,14 +366,43 @@ python musicbrainz_cleaner.py --test-connection ## Maintenance ### Regular Tasks -- Update known artist/recording mappings +- Update name variations mapping - Monitor MusicBrainz API changes - Update dependencies as needed -- **NEW**: Monitor database performance -- **NEW**: Update fuzzy search thresholds based on usage +- Monitor database performance +- Update fuzzy search thresholds based on usage +- **NEW**: Review and update band name protection list +- **NEW**: Monitor collaboration detection accuracy ### Support - GitHub issues for bug reports - Documentation updates - User feedback integration -- **NEW**: Database connection troubleshooting guide \ No newline at end of file +- Database connection troubleshooting guide +- **NEW**: Collaboration detection troubleshooting guide + +## Lessons Learned + +### Database Integration +- **Direct PostgreSQL access is 10x faster** than API calls +- **Docker networking** requires container IPs, not localhost +- **Database name matters**: `musicbrainz_db` not `musicbrainz` +- **Static caches cause problems**: Wrong MBIDs override correct database lookups + +### Collaboration Handling +- **Primary patterns** (ft., feat.) are always collaborations +- **Secondary patterns** (&, and) require intelligence to distinguish from band names +- **Comma detection** helps identify collaborations +- **Artist credit lookup** is essential for preserving all collaborators + +### Edge Cases +- **Dash variations** (regular vs Unicode) cause exact match failures +- **Artist aliases** are common and important (98 Degrees → 98°) +- **Sort names** handle "Last, First" formats +- **Numerical suffixes** in names need special handling (S Club 7 → S Club) + +### Performance Optimization +- **Remove static caches** for better accuracy +- **Database-first approach** ensures live data +- **Fuzzy search thresholds** need tuning for different datasets +- **Connection pooling** would improve performance for large datasets \ No newline at end of file diff --git a/README.md b/README.md index 9cbd930..4c1f361 100644 --- a/README.md +++ b/README.md @@ -1,14 +1,19 @@ -# 🎵 MusicBrainz Data Cleaner v2.0 +# 🎵 MusicBrainz Data Cleaner v3.0 -A powerful command-line tool that cleans and normalizes your song data using the MusicBrainz database. **Now with direct database access and fuzzy search for maximum performance and accuracy!** +A powerful command-line tool that cleans and normalizes your song data using the MusicBrainz database. **Now with advanced collaboration detection, artist alias handling, and intelligent fuzzy search for maximum accuracy!** -## ✨ What's New in v2.0 +## ✨ What's New in v3.0 - **🚀 Direct Database Access**: Connect directly to PostgreSQL for 10x faster performance -- **🎯 Fuzzy Search**: Intelligent matching for similar artist names and song titles +- **🎯 Advanced Fuzzy Search**: Intelligent matching for similar artist names and song titles - **🔄 Automatic Fallback**: Falls back to API mode if database access fails - **⚡ No Rate Limiting**: Database queries don't have API rate limits - **📊 Similarity Scoring**: See how well matches are scored +- **🆕 Collaboration Detection**: Intelligently handle complex collaborations like "Pitbull ft. Ne-Yo, Afrojack & Nayer" +- **🆕 Artist Aliases**: Handle name variations like "98 Degrees" → "98°" and "S Club 7" → "S Club" +- **🆕 Sort Names**: Handle "Last, First" formats like "Corby, Matt" → "Matt Corby" +- **🆕 Edge Case Handling**: Support for artists with hyphens, exclamation marks, numbers, and special characters +- **🆕 Band Name Protection**: Distinguish between band names (Simon & Garfunkel) and collaborations (Lavato, Demi & Joe Jonas) ## ✨ What It Does @@ -47,6 +52,9 @@ pip install requests psycopg2-binary fuzzywuzzy python-Levenshtein git clone https://github.com/metabrainz/musicbrainz-docker.git cd musicbrainz-docker +# Update postgres.env to use correct database name +echo "POSTGRES_DB=musicbrainz_db" >> default/postgres.env + # Start the server docker-compose up -d @@ -56,9 +64,9 @@ docker-compose logs -f musicbrainz #### Option B: Manual Setup 1. Install PostgreSQL 12+ -2. Create database: `createdb musicbrainz` +2. Create database: `createdb musicbrainz_db` 3. Import MusicBrainz data dump -4. Start MusicBrainz server on port 5001 +4. Start MusicBrainz server on port 8080 ### 3. Test Connection ```bash @@ -79,29 +87,31 @@ That's it! Your cleaned data will be saved to `your_songs_cleaned.json` ## 📋 Requirements - **Python 3.6+** -- **MusicBrainz Server** running on localhost:5001 +- **MusicBrainz Server** running on localhost:8080 - **PostgreSQL Database** accessible on localhost:5432 - **Dependencies**: `requests`, `psycopg2-binary`, `fuzzywuzzy`, `python-Levenshtein` ## 🔧 Server Configuration ### Database Access -- **Host**: localhost +- **Host**: localhost (or Docker container IP: 172.18.0.2) - **Port**: 5432 (PostgreSQL default) -- **Database**: musicbrainz +- **Database**: musicbrainz_db (actual database name) - **User**: musicbrainz - **Password**: musicbrainz (default, should be changed in production) ### HTTP API (Fallback) -- **URL**: http://localhost:5001 +- **URL**: http://localhost:8080 - **Endpoint**: /ws/2/ - **Format**: JSON ### Troubleshooting - **Database Connection Failed**: Check PostgreSQL is running and credentials are correct -- **API Connection Failed**: Check MusicBrainz server is running on port 5001 +- **API Connection Failed**: Check MusicBrainz server is running on port 8080 - **Slow Performance**: Ensure database indexes are built - **No Results**: Verify data has been imported to the database +- **NEW**: **Docker Networking**: Use container IP (172.18.0.2) for Docker-to-Docker connections +- **NEW**: **Database Name**: Ensure using `musicbrainz_db` not `musicbrainz` ## 🧪 Testing @@ -111,19 +121,31 @@ Run the test suite to verify everything works correctly: # Run all tests python3 src/tests/run_tests.py +# Run specific test categories +python3 src/tests/run_tests.py --unit # Unit tests only +python3 src/tests/run_tests.py --integration # Integration tests only + # Run specific test module python3 src/tests/run_tests.py test_data_loader python3 src/tests/run_tests.py test_cli + +# List all available tests +python3 src/tests/run_tests.py --list ``` +#### Test Categories +- **Unit Tests**: Test individual components in isolation +- **Integration Tests**: Test interactions between components and database +- **Debug Tests**: Debug scripts and troubleshooting tools + ## 📁 Data Files -The tool uses external JSON files for known artist and recording data: +The tool uses external JSON files for name variations: -- **`data/known_artists.json`**: Contains known artist MBIDs for common artists +- **`data/known_artists.json`**: Contains name variations (ACDC → AC/DC, ft. → feat.) - **`data/known_recordings.json`**: Contains known recording MBIDs for common songs -These files can be easily updated without touching the code, making it simple to add new artists and recordings. +These files can be easily updated without touching the code, making it simple to add new name variations. ## 🎯 Features @@ -131,6 +153,16 @@ These files can be easily updated without touching the code, making it simple to - `ACDC` → `AC/DC` - `Bruno Mars ft. Cardi B` → `Bruno Mars feat. Cardi B` - `featuring` → `feat.` +- `98 Degrees` → `98°` (artist aliases) +- `S Club 7` → `S Club` (numerical suffixes) +- `Corby, Matt` → `Matt Corby` (sort names) + +### ✅ Collaboration Detection +- **Primary Patterns**: "ft.", "feat.", "featuring" (always collaborations) +- **Secondary Patterns**: "&", "and", "," (intelligent detection) +- **Band Name Protection**: "Simon & Garfunkel" (not collaboration) +- **Complex Collaborations**: "Pitbull ft. Ne-Yo, Afrojack & Nayer" +- **Case Insensitive**: "Featuring" → "featuring" ### ✅ Song Title Fixes - `Shot In The Dark` → `Shot in the Dark` @@ -145,11 +177,20 @@ These files can be easily updated without touching the code, making it simple to - Keeps all your existing fields (guid, path, disabled, favorite, etc.) - Only adds new fields, never removes existing ones -### 🆕 Fuzzy Search +### 🆕 Advanced Fuzzy Search - **Intelligent Matching**: Finds similar names even with typos or variations - **Similarity Scoring**: Shows how well each match scores (0.0 to 1.0) - **Configurable Thresholds**: Adjust matching sensitivity - **Multiple Algorithms**: Uses ratio, partial ratio, and token sort matching +- **Enhanced Search Fields**: artist.name, artist_alias.name, artist.sort_name +- **Dash Handling**: Regular dash (-) vs Unicode dash (‐) +- **Substring Protection**: Avoids false matches like "Sleazy-E" vs "Eazy-E" + +### 🆕 Edge Case Support +- **Hyphenated Artists**: "Blink-182", "Ne-Yo", "G-Eazy" +- **Exclamation Marks**: "P!nk", "Panic! At The Disco", "3OH!3" +- **Numbers**: "98 Degrees", "S Club 7", "3 Doors Down" +- **Special Characters**: "a-ha", "The B-52s", "Salt-N-Pepa" ## 📖 Usage Examples @@ -283,19 +324,23 @@ Using database connection - The artist might not be in the MusicBrainz database - Try checking the spelling or using a different variation - The search index might still be building (wait a few minutes) -- **NEW**: Check fuzzy search similarity score - lower threshold if needed +- Check fuzzy search similarity score - lower threshold if needed +- **NEW**: Check for artist aliases (e.g., "98 Degrees" → "98°") +- **NEW**: Check for sort names (e.g., "Corby, Matt" → "Matt Corby") ### "Could not find recording" - The song might not be in the database - The title might not match exactly - Try a simpler title (remove extra words) -- **NEW**: Check fuzzy search similarity score - lower threshold if needed +- Check fuzzy search similarity score - lower threshold if needed +- **NEW**: For collaborations, check if it's stored under the main artist ### Connection errors - **Database**: Make sure PostgreSQL is running and accessible -- **API**: Make sure your MusicBrainz server is running on `http://localhost:5001` +- **API**: Make sure your MusicBrainz server is running on `http://localhost:8080` - Check that Docker containers are up and running - Verify the server is accessible in your browser +- **NEW**: For Docker, use container IP (172.18.0.2) instead of localhost ### JSON errors - Make sure your input file is valid JSON @@ -303,9 +348,14 @@ Using database connection - Verify all required fields are present ### Performance issues -- **NEW**: Use database mode instead of API mode for better performance -- **NEW**: Ensure database indexes are built for faster queries -- **NEW**: Check fuzzy search thresholds - higher thresholds mean fewer but more accurate matches +- Use database mode instead of API mode for better performance +- Ensure database indexes are built for faster queries +- Check fuzzy search thresholds - higher thresholds mean fewer but more accurate matches + +### Collaboration detection issues +- **NEW**: Check if it's a band name vs collaboration (e.g., "Simon & Garfunkel" vs "Lavato, Demi & Joe Jonas") +- **NEW**: Verify the collaboration pattern is supported (ft., feat., featuring, &, and, ,) +- **NEW**: Check case sensitivity - patterns are case-insensitive ## 🎯 Use Cases @@ -314,6 +364,8 @@ Using database connection - **Music Apps**: Ensure consistent data across your application - **Data Migration**: Clean up legacy music data when moving to new systems - **Fuzzy Matching**: Handle typos and variations in artist/song names +- **NEW**: **Collaboration Handling**: Process complex artist collaborations +- **NEW**: **Edge Cases**: Handle artists with special characters and unusual names ## 📚 What are MBIDs? @@ -332,6 +384,28 @@ Using database connection | **Database** | ⚡ 10x faster | ❌ None | ✅ Yes | 🔧 Medium | | **API** | 🐌 Slower | ⏱️ Yes (0.1s delay) | ❌ No | ✅ Easy | +## 🆕 Collaboration Detection Examples + +| Input | Type | Detection | Output | +|-------|------|-----------|---------| +| `Bruno Mars ft. Cardi B` | Collaboration | ✅ Primary pattern | `Bruno Mars feat. Cardi B` | +| `Pitbull ft. Ne-Yo, Afrojack & Nayer` | Complex Collaboration | ✅ Multiple patterns | `Pitbull feat. Ne-Yo, Afrojack & Nayer` | +| `Simon & Garfunkel` | Band Name | ❌ Protected | `Simon & Garfunkel` | +| `Lavato, Demi & Joe Jonas` | Collaboration | ✅ Comma detection | `Lavato, Demi & Joe Jonas` | +| `Hall & Oates` | Band Name | ❌ Protected | `Hall & Oates` | + +## 🆕 Edge Case Examples + +| Input | Type | Handling | Output | +|-------|------|----------|---------| +| `ACDC` | Name Variation | ✅ Alias lookup | `AC/DC` | +| `98 Degrees` | Artist Alias | ✅ Alias search | `98°` | +| `S Club 7` | Numerical Suffix | ✅ Suffix removal | `S Club` | +| `Corby, Matt` | Sort Name | ✅ Sort name search | `Matt Corby` | +| `Blink-182` | Dash Variation | ✅ Unicode dash handling | `blink‐182` | +| `P!nk` | Special Characters | ✅ Direct search | `P!nk` | +| `3OH!3` | Numbers + Special | ✅ Direct search | `3OH!3` | + ## 🤝 Contributing Found a bug or have a feature request? @@ -351,6 +425,32 @@ This tool is provided as-is for educational and personal use. - [MusicBrainz Docker](https://github.com/metabrainz/musicbrainz-docker) - Docker setup - [FuzzyWuzzy](https://github.com/seatgeek/fuzzywuzzy) - Fuzzy string matching library +## 📝 Lessons Learned + +### Database Integration +- **Direct PostgreSQL access is 10x faster** than API calls +- **Docker networking** requires container IPs, not localhost +- **Database name matters**: `musicbrainz_db` not `musicbrainz` +- **Static caches cause problems**: Wrong MBIDs override correct database lookups + +### Collaboration Handling +- **Primary patterns** (ft., feat.) are always collaborations +- **Secondary patterns** (&, and) require intelligence to distinguish from band names +- **Comma detection** helps identify collaborations +- **Artist credit lookup** is essential for preserving all collaborators + +### Edge Cases +- **Dash variations** (regular vs Unicode) cause exact match failures +- **Artist aliases** are common and important (98 Degrees → 98°) +- **Sort names** handle "Last, First" formats +- **Numerical suffixes** in names need special handling (S Club 7 → S Club) + +### Performance Optimization +- **Remove static caches** for better accuracy +- **Database-first approach** ensures live data +- **Fuzzy search thresholds** need tuning for different datasets +- **Connection pooling** would improve performance for large datasets + --- **Happy cleaning! 🎵✨** \ No newline at end of file diff --git a/data/known_artists.json b/data/known_artists.json index 1dbcaf7..c6b3271 100644 --- a/data/known_artists.json +++ b/data/known_artists.json @@ -1,26 +1,4 @@ { - "artists": { - "ACDC": "66c662b6-6e2f-4930-8610-912e24c63ed1", - "AC/DC": "66c662b6-6e2f-4930-8610-912e24c63ed1", - "Bruno Mars": "afb680f2-b6eb-4cd7-a70b-a63b25c763d5", - "Taylor Swift": "20244d07-534f-4eff-b4d4-930878889970", - "Cardi B": "2f3c4d70-0462-40da-bba3-0aec5772c556", - "Afrojack": "a3ee920f-4e7f-4993-8aca-4b8538cfaa4a", - "Chris Brown": "c234fa41-e2a3-4492-8fb8-810e88c7404b", - "Andy Grammer": "c8c1b1c1-5c1c-4c1c-8c1c-1c1c1c1c1c1c", - "Avicii": "c8c1b1c1-5c1c-4c1c-8c1c-1c1c1c1c1c1c", - "Nicky Romero": "c8c1b1c1-5c1c-4c1c-8c1c-1c1c1c1c1c1c", - "Bastille": "c8c1b1c1-5c1c-4c1c-8c1c-1c1c1c1c1c1c", - "Big Sean": "c8c1b1c1-5c1c-4c1c-8c1c-1c1c1c1c1c1c", - "E40": "c8c1b1c1-5c1c-4c1c-8c1c-1c1c1c1c1c1c", - "Blake Shelton": "c8c1b1c1-5c1c-4c1c-8c1c-1c1c1c1c1c1c", - "Gwen Stefani": "c8c1b1c1-5c1c-4c1c-8c1c-1c1c1c1c1c1c", - "Bradley Cooper": "c8c1b1c1-5c1c-4c1c-8c1c-1c1c1c1c1c1c", - "Coldplay": "c8c1b1c1-5c1c-4c1c-8c1c-1c1c1c1c1c1c", - "Drake": "c8c1b1c1-5c1c-4c1c-8c1c-1c1c1c1c1c1c", - "Frank Sinatra": "c8c1b1c1-5c1c-4c1c-8c1c-1c1c1c1c1c1c", - "Garth Brooks": "c8c1b1c1-5c1c-4c1c-8c1c-1c1c1c1c1c1c" - }, "name_variations": { "ACDC": "AC/DC", "ft.": "feat.", @@ -28,8 +6,9 @@ "featuring": "feat." }, "metadata": { - "description": "Known artist MBIDs for common artists", - "last_updated": "2024-01-01", - "version": "1.0" + "description": "Name variations for artist name normalization", + "last_updated": "2024-12-19", + "version": "3.0", + "note": "Removed artists section - now using database search instead" } } \ No newline at end of file diff --git a/data/songs.json b/data/songs.json index 82734b6..b6daf9d 100755 --- a/data/songs.json +++ b/data/songs.json @@ -169868,7 +169868,7 @@ "title": "Girl Crush" }, { - "artist": "Little M!x", + "artist": "Little Mix", "disabled": false, "favorite": false, "genre": "Karaoke", @@ -169877,7 +169877,7 @@ "title": "How Ya Doin'" }, { - "artist": "Little M!x", + "artist": "Little Mix", "disabled": false, "favorite": false, "genre": "Karaoke", @@ -358629,7 +358629,7 @@ "title": "Green Garden" }, { - "artist": "Little M!x", + "artist": "Little Mix", "disabled": false, "favorite": false, "guid": "c8a57b4d-070d-0569-f178-edc96ff7b64f", diff --git a/data/songs_100_random.json b/data/songs_100_random.json new file mode 100644 index 0000000..b9563d1 --- /dev/null +++ b/data/songs_100_random.json @@ -0,0 +1,835 @@ +[ + { + "artist": "One Night Only", + "disabled": false, + "favorite": false, + "guid": "c789a741-923a-8c8f-1a5a-1fdb7399f12b", + "path": "z://CDG\\Sunfly Collection\\Sunfly Hits\\SF265\\One Night Only - Just For Tonight - SF265 - 14.mp3", + "title": "Just For Tonight" + }, + { + "artist": "Owl City", + "disabled": false, + "favorite": false, + "guid": "65643569-49f5-8cca-aba0-939a52bc462f", + "path": "z://MP4\\KaraFun Karaoke\\Karaoke Vanilla Twilight - Owl City.mp4", + "title": "Vanilla Twilight" + }, + { + "artist": "Big Mountain", + "disabled": false, + "favorite": false, + "guid": "89b50784-1b62-ee66-1dee-0619a59c0c1c", + "path": "z://MP4\\KaraFun Karaoke\\Karaoke Baby, I Love Your Way - Big Mountain.mp4", + "title": "Baby, I Love Your Way" + }, + { + "artist": "Steppenwolf", + "disabled": false, + "favorite": false, + "guid": "d2b41e7d-f96c-0907-ce25-920dd91fb921", + "path": "z://CDG\\Various\\Steppenwolf - Born To Be Wild (3).mp3", + "title": "Born To Be Wild (3)" + }, + { + "artist": "Beyonc\u00e9", + "disabled": false, + "favorite": false, + "guid": "ac14f3d1-ff20-3ac1-0191-905de8a99877", + "path": "z://MP4\\Sing King Karaoke\\Beyonc\u00e9 - 7 11 (Karaoke Version).mp4", + "title": "7 11" + }, + { + "artist": "Foreigner", + "disabled": false, + "favorite": false, + "guid": "9bf3fabf-6957-4241-dd1c-6dba29d83655", + "path": "z://MP4\\Let's Sing Karaoke\\Foreigner - I Want To Know What Love Is (Karaoke & Lyrics) (2).mp4", + "title": "I Want To Know What Love Is" + }, + { + "artist": "Liam Gallagher", + "disabled": false, + "favorite": false, + "guid": "e874f895-e946-ef4d-354e-8c33af50be71", + "path": "z://MP4\\KtvEntertainment\\Liam Gallagher - For What It's Worth Karaoke.mp4", + "title": "For What It's Worth" + }, + { + "artist": "George Michael", + "disabled": false, + "favorite": false, + "guid": "459f42c2-9aa4-8eac-e224-b01c5dd23f92", + "path": "z://MP4\\Karaoke Sing Sing\\George Michael - Careless Whisper (Karaoke Version).mp4", + "title": "Careless Whisper" + }, + { + "artist": "Queens Of The Stone Age", + "disabled": false, + "favorite": false, + "guid": "c5a2429d-c54d-f982-4bf3-2eb7eeb1cead", + "path": "z://CDG\\Various\\Queens Of The Stone Age - Little Sister.mp3", + "title": "Little Sister" + }, + { + "artist": "Rocketman", + "disabled": false, + "favorite": false, + "guid": "6a3c0389-9b7b-b6aa-a3eb-1cd8b0524f12", + "path": "z://MP4\\KaraFun Karaoke\\Karaoke The Bitch is Back - Rocketman.mp4", + "title": "The Bitch is Back" + }, + { + "artist": "Bobby Vee", + "disabled": false, + "favorite": false, + "genre": "Karaoke", + "guid": "fa4dea55-647c-5efa-534f-f0bcbbd5414b", + "path": "z://CDG\\Sunfly Collection\\Sunfly Most Wanted\\SFMW 813\\Bobby Vee - Run To Him - SFMW 813 -05.mp3", + "title": "Run To Him" + }, + { + "artist": "Sheryl Crow", + "disabled": false, + "favorite": false, + "guid": "029cdcce-6813-5b7f-8a60-ebf522dc4c77", + "path": "z://CDG\\Various\\Sheryl Crow - Run Baby Run.mp3", + "title": "Run Baby Run" + }, + { + "artist": "Lady Gaga", + "disabled": false, + "favorite": false, + "guid": "47bb91da-fc18-b97d-72b1-32ca1d847bd5", + "path": "z://CDG\\Mr Entertainer\\MRTC01\\MRTC01-04\\MRTC01-4-09 - Lady Gaga - Poker Face.mp3", + "title": "Poker Face" + }, + { + "artist": "Cliff Richard", + "disabled": false, + "favorite": false, + "guid": "9586c5d0-538b-8351-2f87-9bee5e6ce827", + "path": "z://MP4\\KaraFun Karaoke\\Karaoke Rave On - Cliff Richard.mp4", + "title": "Rave On" + }, + { + "artist": "Glen Campbell", + "disabled": false, + "favorite": false, + "guid": "0734fdc9-73a6-842c-14f6-36f643c12e99", + "path": "z://CDG\\Sunfly Collection\\Sunfly Hits\\SF051\\Glen Campbell - Rhinestone Cowboy - SF051 - 15.mp3", + "title": "Rhinestone Cowboy" + }, + { + "artist": "Chesney Hawkes", + "disabled": false, + "favorite": false, + "genre": "Karaoke", + "guid": "40f504ff-dd2e-5ebe-78ce-c7445402c30a", + "path": "z://MP4\\ZoomKaraokeOfficial\\Chesney Hawkes - The One And Only.mp4", + "title": "The One And Only" + }, + { + "artist": "Tony Orlando & Dawn", + "disabled": false, + "favorite": false, + "guid": "37b41627-6786-8bc9-b340-21561bb0a141", + "path": "z://MP4\\KaraFun Karaoke\\Karaoke Candida - Tony Orlando And Dawn.mp4", + "title": "Candida" + }, + { + "artist": "Judas Priest", + "disabled": false, + "favorite": false, + "guid": "d51e1ee3-76b1-4b64-722f-c09b7678ed10", + "path": "z://MP4\\KaraFun Karaoke\\Karaoke Breaking The Law - Judas Priest.mp4", + "title": "Breaking The Law" + }, + { + "artist": "America", + "disabled": false, + "favorite": false, + "guid": "f92d9498-09c4-a2fa-9077-5cae27303065", + "path": "z://MP4\\KaraFun Karaoke\\Karaoke Sister Golden Hair - America.mp4", + "title": "Sister Golden Hair" + }, + { + "artist": "Gerry Guthrie", + "disabled": false, + "favorite": false, + "guid": "10272677-9a07-cdaa-0017-4fe201873ccf", + "path": "z://MP4\\KtvEntertainment\\Gerry Guthrie - Did She Mention My Name Karaoke Lyrics.mp4", + "title": "Did She Mention My Name" + }, + { + "artist": "The Allman Brothers Band", + "disabled": false, + "favorite": false, + "guid": "87bb825e-7f8c-6cef-40b1-dc2fac930e79", + "path": "z://MP4\\KtvEntertainment\\The Allman Brothers Band - Whipping Post Karaoke Lyrics.mp4", + "title": "Whipping Post" + }, + { + "artist": "The Beach Boys", + "disabled": false, + "favorite": false, + "genre": "Karaoke", + "guid": "b8d51214-0c90-d4ce-928f-b3e2cc1080c6", + "path": "z://MP4\\ZoomKaraokeOfficial\\The Beach Boys - Sloop John B (Cool Vocal Only Ending).mp4", + "title": "Sloop John B (Cool Vocal Only Ending)" + }, + { + "artist": "Floaters", + "disabled": false, + "favorite": false, + "genre": "Karaoke", + "guid": "0b7e8b42-0f80-96b0-c0c7-3f7ddf0c8150", + "path": "z://CDG\\Sunfly Collection\\Sunfly Hits\\SF103\\Floaters - Float On - SF103 - 04.mp3", + "title": "Float On" + }, + { + "artist": "Afroman", + "disabled": false, + "favorite": false, + "guid": "3076eef1-f794-bd75-5d10-1e90139609d0", + "path": "z://MP4\\KaraFun Karaoke\\Karaoke Because I Got High - Afroman.mp4", + "title": "Because I Got High" + }, + { + "artist": "Lavato, Demi & Joe Jonas", + "disabled": false, + "favorite": false, + "genre": "Karaoke", + "guid": "7c1b42d7-14e4-2f6d-40ba-fa41a217c0ab", + "path": "z://CDG\\SBI\\SBI-05\\Lavato, Demi & Joe Jonas - This Is Me.mp3", + "title": "This Is Me" + }, + { + "artist": "Drake", + "disabled": false, + "favorite": false, + "guid": "2db9fd74-3353-0fb2-a289-666ad6e1020a", + "path": "z://MP4\\King of Karaoke\\Drake - Passionfruit - King of Karaoke.mp4", + "title": "Passionfruit" + }, + { + "artist": "Elvis Presley", + "disabled": false, + "favorite": false, + "genre": "Karaoke", + "guid": "b13a7a30-c5ee-a701-348f-d79a36e18b8e", + "path": "z://CDG\\Various\\Elvis Presley - Where Do You Come From.mp3", + "title": "Where Do You Come From" + }, + { + "artist": "Coldplay", + "disabled": false, + "favorite": false, + "genre": "Karaoke", + "guid": "7820a2af-e4fa-f3a2-d070-46ad73e08ace", + "path": "z://CDG\\Sunfly Collection\\Sunfly Hits\\SF308\\Coldplay - Paradise - SF308 - 11.mp3", + "title": "Paradise" + }, + { + "artist": "Belinda Carlisle", + "disabled": false, + "favorite": false, + "guid": "9293d245-f376-b2c8-a268-28d8dab3161f", + "path": "z://CDG\\Various\\Belinda Carlisle - Heaven Is A Place On Earth.mp3", + "title": "Heaven Is A Place On Earth" + }, + { + "artist": "Glee", + "disabled": false, + "favorite": false, + "guid": "ee583fec-9afb-1e58-c2a4-6b01d4241cdc", + "path": "z://CDG\\Mr Entertainer\\MRTC01\\MRTC01-02\\MRTC01-2-13 - Glee Cast - Halo+walking On Sunshine.mp3", + "title": "Halo+walking On Sunshine" + }, + { + "artist": "Calvin Harris ft. Florence Welch", + "disabled": false, + "favorite": false, + "genre": "Karaoke", + "guid": "2a209afa-e7f9-ea64-c928-7f14499d9756", + "path": "z://MP4\\ZoomKaraokeOfficial\\Calvin Harris feat Florence Welch - Sweet Nothing.mp4", + "title": "Sweet Nothing" + }, + { + "artist": "Jim Croce", + "disabled": false, + "favorite": false, + "guid": "ceda7d43-a35d-1f1c-81d2-7ff6eb8c9848", + "path": "z://MP4\\KtvEntertainment\\Jim Croce - Lover's Cross Karaoke Lyrics.mp4", + "title": "Lover's Cross" + }, + { + "artist": "Herman's Hermits", + "disabled": false, + "favorite": false, + "genre": "Karaoke", + "guid": "120fb0a5-e36e-1d0f-1acd-80ecadc8ec8b", + "path": "z://MP4\\ZoomKaraokeOfficial\\Hermans Hermits - Somethings Happening.mp4", + "title": "Something's Happening" + }, + { + "artist": "The Everly Brothers", + "disabled": false, + "favorite": false, + "guid": "08ead07b-6164-e9da-b203-554de875461b", + "path": "z://CDG\\Sunfly Collection\\Sunfly Hits\\SF045\\The Everly Brothers - Price Of Love, The - SF045 - 11.mp3", + "title": "Price Of Love, The" + }, + { + "artist": "Bring Me The Horizon", + "disabled": false, + "favorite": false, + "guid": "c0382fef-4a55-7cc0-54a7-12b9bf3b4198", + "path": "z://MP4\\Sing King Karaoke\\Bring Me The Horizon - Can You Feel My Heart (Karaoke Version).mp4", + "title": "Can You Feel My Heart" + }, + { + "artist": "Van Halen", + "disabled": false, + "favorite": false, + "guid": "d6ebdbb5-a61a-0fac-84d4-62f335abcaca", + "path": "z://MP4\\CerealKillerKaraoke\\CKK-VR - Van Halen - Dirty Movies (Karaoke).mp4", + "title": "Dirty Movies" + }, + { + "artist": "Chris Brown Featuring T-Pain", + "disabled": false, + "favorite": false, + "genre": "Karaoke", + "guid": "b0329fd3-dadd-bee1-8807-ae090b3a1b64", + "path": "z://MP4\\KaraokeOnVEVO\\Chris Brown Featuring T-Pain - Kiss Kiss.mp4", + "title": "Kiss Kiss" + }, + { + "artist": "Andy Griggs", + "disabled": false, + "favorite": false, + "genre": "Karaoke", + "guid": "19fca440-e6ba-b5d0-764c-395cda15ad68", + "path": "z://CDG\\Various\\Andy Griggs - You Wont Ever Be Lonely.mp3", + "title": "You Wont Ever Be Lonely" + }, + { + "artist": "Chris Norman", + "disabled": false, + "favorite": false, + "genre": "Karaoke", + "guid": "41c56192-4929-4742-77b3-9d823ebf8fe7", + "path": "z://MP4\\ZoomKaraokeOfficial\\Chris Norman - The Girl I Love.mp4", + "title": "The Girl I Love" + }, + { + "artist": "Ruth B", + "disabled": false, + "favorite": false, + "guid": "0848db87-6f59-ebdf-1cdd-00b452b40b34", + "path": "z://MP4\\Sing King Karaoke\\Ruth B - Golden (Karaoke Version).mp4", + "title": "Golden" + }, + { + "artist": "boygenius", + "disabled": false, + "favorite": false, + "genre": "Karaoke", + "guid": "1653c073-cdbb-4256-71c5-09ec5185f37d", + "path": "z://MP4\\Sing King Karaoke\\boygenius - Emily Im Sorry.mp4", + "title": "Emily I'm Sorry" + }, + { + "artist": "Blue Mink", + "disabled": false, + "favorite": false, + "genre": "Karaoke", + "guid": "4fce614f-7a59-50bd-737f-db5da614abb9", + "path": "z://MP4\\KaraokeOnVEVO\\Blue Mink - Melting Pot.mp4", + "title": "Melting Pot" + }, + { + "artist": "Dean Martin", + "disabled": false, + "favorite": false, + "guid": "3717fe66-95b4-6fec-36d5-fca62e135421", + "path": "z://MP4\\KtvEntertainment\\Dean Martin - Non Dimenticar Karaoke Lyrics.mp4", + "title": "Non Dimenticar" + }, + { + "artist": "Queen", + "disabled": false, + "favorite": false, + "guid": "c2427066-b8b8-90bf-2c8a-47a9ec7e1096", + "path": "z://MP4\\Sing King Karaoke\\Queen - Somebody To Love (Karaoke Version).mp4", + "title": "Somebody To Love" + }, + { + "artist": "Dionne Warwick", + "disabled": false, + "favorite": false, + "guid": "3c5506b7-6980-d7af-efc2-f0476d74cada", + "path": "z://MP4\\KaraFun Karaoke\\Karaoke That s What Friends Are For - Dionne Warwick.mp4", + "title": "That s What Friends Are For" + }, + { + "artist": "The Isley Brothers", + "disabled": false, + "favorite": false, + "guid": "f99facc0-a093-e0e7-6faf-f332e2067da4", + "path": "z://MP4\\KaraFun Karaoke\\Karaoke Shout - The Isley Brothers.mp4", + "title": "Shout" + }, + { + "artist": "Olivia Rodrigo", + "disabled": false, + "favorite": false, + "guid": "7b651ed4-04ed-24f4-817e-d75a308594e9", + "path": "z://MP4\\Stingray Karaoke\\Olivia Rodrigo - get him back! (Karaoke Version).mp4", + "title": "get him back!" + }, + { + "artist": "The Kentucky Headhunters", + "disabled": false, + "favorite": false, + "guid": "6eaccc40-e1bf-b1de-23c4-cdaa7aad730c", + "path": "z://MP4\\KaraFun Karaoke\\Karaoke Spirit In The Sky - The Kentucky Headhunters.mp4", + "title": "Spirit In The Sky" + }, + { + "artist": "Yebba", + "disabled": false, + "favorite": false, + "genre": "Karaoke", + "guid": "08c840ce-8b80-4856-6132-8d7bf9a357e9", + "path": "z://MP4\\Sing King Karaoke\\Yebba - My Mind Karaoke Version).mp4", + "title": "My Mind Karaoke Version)" + }, + { + "artist": "Shawn Mendes", + "disabled": false, + "favorite": false, + "guid": "9722f3a0-e17f-1df6-cebe-516a903530dc", + "path": "z://MP4\\Sing King Karaoke\\Shawn Mendes - Where Were You In The Morning (Karaoke Version).mp4", + "title": "Where Were You In The Morning" + }, + { + "artist": "Abc", + "disabled": false, + "favorite": false, + "guid": "511ea86b-d81a-257f-ffc2-6ab2453df440", + "path": "z://CDG\\Sunfly Collection\\Sunfly Hits\\SF111\\Abc - The Look Of Love - SF111 - 02.mp3", + "title": "The Look Of Love" + }, + { + "artist": "Darryl Worley", + "disabled": false, + "favorite": false, + "guid": "b3422ad6-01de-00aa-84a4-e2efa46e0c98", + "path": "z://MP4\\Let's Sing Karaoke\\Worley, Darryl - I Miss My Friend (Karaoke & Lyrics) (2).mp4", + "title": "I Miss My Friend" + }, + { + "artist": "Peggy Lee", + "disabled": false, + "favorite": false, + "guid": "ff39f28a-d933-6c86-72e8-aecc4eb0a844", + "path": "z://MP4\\KaraFun Karaoke\\Karaoke Fever - Peggy Lee.mp4", + "title": "Fever" + }, + { + "artist": "Lorrie Morgan", + "disabled": false, + "favorite": false, + "guid": "a88ab5a7-ec7c-22ae-4b8a-3f4701aeff04", + "path": "z://CDG\\Various\\Lorrie Morgan - Dear Me.mp3", + "title": "Dear Me" + }, + { + "artist": "Kacey Musgraves", + "disabled": false, + "favorite": false, + "genre": "Karaoke", + "guid": "39ad7a92-fb3c-6874-85c6-5150ae52415b", + "path": "z://MP4\\ZoomKaraokeOfficial\\Kacey Musgraves - Merry Go Round.mp4", + "title": "Merry Go 'Round" + }, + { + "artist": "Rihanna ft. Mikky Ekko", + "disabled": false, + "favorite": false, + "genre": "Karaoke", + "guid": "3a3bd95f-f573-d3c2-ac2d-5b75fba98f14", + "path": "z://CDG\\Big Hits Karaoke\\BHK039\\BH039-04 - Rihanna & Mikky Ekko - Stay.mp3", + "title": "Stay" + }, + { + "artist": "Luciano Pavarotti", + "disabled": false, + "favorite": false, + "guid": "48ffd3c9-3fb9-be69-4e0e-d4974144c527", + "path": "z://MP4\\KaraFun Karaoke\\O Sole Mio - Luciano Pavarotti Karaoke Version KaraFun.mp4", + "title": "O Sole Mio" + }, + { + "artist": "David Bowie", + "disabled": false, + "favorite": false, + "guid": "10294c23-dee6-1e13-5bfc-090d5ed88c4a", + "path": "z://MP4\\Stingray Karaoke\\Let's Dance in the Style of David Bowie karaoke video with lyrics (no lead vocal).mp4", + "title": "Let's Dance" + }, + { + "artist": "Smokie", + "disabled": false, + "favorite": false, + "guid": "d2dc1db0-ae19-a8a7-42e4-f19ab155050b", + "path": "z://MP4\\KaraFun Karaoke\\Karaoke Needles and Pins - Smokie.mp4", + "title": "Needles and Pins" + }, + { + "artist": "LMFAO", + "disabled": false, + "favorite": false, + "genre": "Karaoke", + "guid": "ed6d018b-9a55-b039-8d0b-78568428f787", + "path": "z://CDG\\SBI\\SBI-03\\SB24973 - LMFAO - Sexy And I Know It.mp3", + "title": "Sexy And I Know It" + }, + { + "artist": "Elvis Presley", + "disabled": false, + "favorite": false, + "genre": "Karaoke", + "guid": "a54bec5e-5898-4b70-cb08-475ff77aa301", + "path": "z://MP4\\Sing King Karaoke\\Elvis Presley - Always on My Mind.mp4", + "title": "Always on My Mind" + }, + { + "artist": "Gabrielle", + "disabled": false, + "favorite": false, + "guid": "da575355-f72f-6934-0a34-fa54c37a8233", + "path": "z://CDG\\Sunfly Collection\\Sunfly Hits\\SF218\\Gabrielle - Stay The Same - SF218 - 12.mp3", + "title": "Stay The Same" + }, + { + "artist": "Destiny's Child", + "disabled": false, + "favorite": false, + "guid": "24f3fb48-ea83-12c1-f998-cfbb917ab927", + "path": "z://CDG\\Sunfly Collection\\Sunfly Hits\\SF177\\Destiny's Child - Survivor - SF177 - 03.mp3", + "title": "Survivor" + }, + { + "artist": "Sabrina Carpenter", + "disabled": false, + "favorite": false, + "genre": "Karaoke", + "guid": "795b7f81-f7a8-c3a7-4888-facd8b7f61a9", + "path": "z://MP4\\sing2karaoke\\Sabrina Carpenter - Busy Woman.mp4", + "title": "Busy Woman" + }, + { + "artist": "The Goodies", + "disabled": false, + "favorite": false, + "guid": "f24004dd-c2fa-2e3e-270b-cef6d1cfa1de", + "path": "z://CDG\\Sunfly Collection\\Sunfly Gold\\SFGD041 - Monty Python & The Goodies\\The Goodies - Make A Daft Noise For Christmas - SFG041 - 09.mp3", + "title": "Make A Daft Noise For Christmas" + }, + { + "artist": "Tony! Toni! Tone!", + "disabled": false, + "favorite": false, + "genre": "Karaoke", + "guid": "3fc77353-14b6-c1e1-a746-6547cdbbceed", + "path": "z://MP4\\KaraokeOnVEVO\\Tony Toni Tone - Feels Good.mp4", + "title": "Feels Good" + }, + { + "artist": "Bruce Springsteen", + "disabled": false, + "favorite": false, + "guid": "0aa63bcd-0d08-82d6-87a2-9f5421be75d2", + "path": "z://CDG\\Various\\Bruce Springsteen - Cover Me.mp3", + "title": "Cover Me" + }, + { + "artist": "Sing", + "disabled": false, + "favorite": false, + "guid": "e436056b-893c-d7e1-651a-2b26e1758b93", + "path": "z://MP4\\KaraFun Karaoke\\Karaoke Let s Face the Music and Dance - Sing.mp4", + "title": "Let s Face the Music and Dance" + }, + { + "artist": "Sublime", + "disabled": false, + "favorite": false, + "guid": "0e06e44b-da10-6067-f471-ea9c3b3a3e7c", + "path": "z://MP4\\KaraFun Karaoke\\Karaoke Garden Grove - Sublime.mp4", + "title": "Garden Grove" + }, + { + "artist": "Kelly Clarkson", + "disabled": false, + "favorite": false, + "genre": "karaoke", + "guid": "bc777abe-f7da-5395-5798-a61b28ebf000", + "path": "z://CDG\\Sunfly Collection\\Sunfly Hits\\SF349\\Kelly Clarkson - Heartbeat Song - SF301 - 09.mp3", + "title": "Heartbeat Song" + }, + { + "artist": "Olly Murs", + "disabled": false, + "favorite": false, + "guid": "608b9fe0-d60b-aba6-b020-9a015838fdf2", + "path": "z://MP4\\KtvEntertainment\\Olly Murs - You Don't Know Love Karaoke Lyrics.mp4", + "title": "You Don't Know Love" + }, + { + "artist": "Doechii", + "disabled": false, + "favorite": false, + "genre": "Karaoke", + "guid": "7795470f-0410-b8c4-c871-2bd31edf1a36", + "path": "z://MP4\\Sing King Karaoke\\Doechii - Anxiety.mp4", + "title": "Anxiety" + }, + { + "artist": "Florence + The Machine", + "disabled": false, + "favorite": false, + "guid": "20967d47-9855-3bed-a442-24e91b4ffdd7", + "path": "z://MP4\\KaraFun Karaoke\\Karaoke No Light, No Light - Florence + The Machine.mp4", + "title": "No Light, No Light" + }, + { + "artist": "The Beatles", + "disabled": false, + "favorite": false, + "guid": "96a11cc6-acb8-8ff7-cecb-7e8cde85e062", + "path": "z://MP4\\KtvEntertainment\\The Beatles - P.S. I Love You (Karaoke without Vocal).mp4", + "title": "P.S. I Love You" + }, + { + "artist": "Mickey Gilley", + "disabled": false, + "favorite": false, + "guid": "6f831da9-1d07-ac40-8875-16981af0b351", + "path": "z://MP4\\KaraFun Karaoke\\Karaoke Talk To Me - Mickey Gilley.mp4", + "title": "Talk To Me" + }, + { + "artist": "Sech ft. Darrell, Nicky Jam, Ozuna & Anuel AA", + "disabled": false, + "favorite": false, + "guid": "dbc38404-6328-9e9a-f82b-09b797d29e00", + "path": "z://MP4\\Stingray Karaoke\\Otro trago Sech feat. Darrell, Nicky Jam, Ozuna & Anuel AA Karaoke with Lyrics.mp4", + "title": "Otro trago" + }, + { + "artist": "Boyzone", + "disabled": false, + "favorite": false, + "genre": "Karaoke", + "guid": "c785c4a5-8b97-a40c-c163-c8be70c7ed1f", + "path": "z://MP4\\Sing King Karaoke\\Boyzone - Baby Can I Hold You Tonight.mp4", + "title": "Baby Can I Hold You Tonight" + }, + { + "artist": "Jeannie C Riley", + "disabled": false, + "favorite": false, + "genre": "Karaoke", + "guid": "3ba6129d-ac04-b405-044d-8b453f565165", + "path": "z://MP4\\ZoomKaraokeOfficial\\Jeannie C Riley - Harper Valley PTA Karaoke Version from Zoom Karaoke.mp4", + "title": "Harper Valley P.T.A. - Karaoke Version from Zoom Karaoke" + }, + { + "artist": "Razorlight", + "disabled": false, + "favorite": false, + "guid": "e28f0e1f-ea58-7f26-cbb3-40b0c3f538b2", + "path": "z://CDG\\Sunfly Collection\\Sunfly Hits\\SF248\\Razorlight - America - SF248 - 06.mp3", + "title": "America" + }, + { + "artist": "Sex Pistols", + "disabled": false, + "favorite": false, + "guid": "144ff474-2a6f-b72a-5b1f-363616437767", + "path": "z://MP4\\singsongsmusic\\God Save the Queen - Karaoke HD (In the style of Sex Pistols).mp4", + "title": "God Save the Queen" + }, + { + "artist": "Dierks Bentley", + "disabled": false, + "favorite": false, + "genre": "Karaoke", + "guid": "7b438180-6926-b78f-7559-6c921a75af62", + "path": "z://MP4\\KaraokeOnVEVO\\Dierks Bentley - Free And Easy (Down The Road I Go.mp4", + "title": "Free And Easy (Down The Road I Go)" + }, + { + "artist": "Iggy Azalea", + "disabled": false, + "favorite": false, + "guid": "77170d20-a51a-1ec3-5f8d-555d299db97d", + "path": "z://MP4\\singsongsmusic\\Team - Karaoke HD (In the style of Iggy Azalea).mp4", + "title": "Team" + }, + { + "artist": "Troye Sivan", + "disabled": false, + "favorite": false, + "guid": "1c2f1901-0be9-4ab9-f012-6a91b99ae86a", + "path": "z://MP4\\Sing King Karaoke\\Troye Sivan - WILD (Karaoke Version).mp4", + "title": "WILD" + }, + { + "artist": "Adam Lambert", + "disabled": false, + "favorite": false, + "guid": "91acab7b-0884-6348-f30a-fb4d50899665", + "path": "z://MP4\\KtvEntertainment\\Adam Lambert - Another Lonely Night Karaoke Lyrics.mp4", + "title": "Another Lonely Night" + }, + { + "artist": "Big Shaq", + "disabled": false, + "favorite": false, + "genre": "karaoke", + "guid": "f8804acb-a48f-ba25-5422-00535521a9b0", + "path": "z://CDG\\Sunfly Collection\\Sunfly Hits\\SF383\\SF383-03 - Big Shaq - Man's Not Hot.mp3", + "title": "Man's Not Hot" + }, + { + "artist": "Air Supply", + "disabled": false, + "favorite": false, + "genre": "Karaoke", + "guid": "bf25b9e1-d366-5b61-aa82-cbb35bcf52a4", + "path": "z://MP4\\Sing King Karaoke\\Air Supply - Making Love Out Of Nothing At All.mp4", + "title": "Making Love Out Of Nothing At All" + }, + { + "artist": "Billy Joel", + "disabled": false, + "favorite": false, + "guid": "93c5c387-b76a-6105-19e4-cf7db9d269e3", + "path": "z://CDG\\Various\\Billy Joel - Entertainer.mp3", + "title": "Entertainer" + }, + { + "artist": "Faith Hill", + "disabled": false, + "favorite": false, + "genre": "Karaoke", + "guid": "5063126b-718f-fe62-ec89-2c701f43ccf8", + "path": "z://CDG\\Sunfly Collection\\Sunfly Most Wanted\\SFMW 844\\Faith Hill - Where Are You Christmas -SFMW 844 -12.mp3", + "title": "Where Are You Christmas" + }, + { + "artist": "Nicki Minaj Ft.drake", + "disabled": false, + "favorite": false, + "guid": "49a1a33d-799b-bbfd-1875-3c551424f75a", + "path": "z://CDG\\Sunfly Collection\\Sunfly Hits\\SF301\\Nicki Minaj Ft.drake - Moment 4 Life (clean) - SF301 - 13.mp3", + "title": "Moment 4 Life (clean)" + }, + { + "artist": "P!nk", + "disabled": false, + "favorite": false, + "guid": "3c330302-071d-5a31-b34b-5cb38c12adf1", + "path": "z://MP4\\KaraFun Karaoke\\Karaoke Long Way To Happy - Pink.mp4", + "title": "Long Way To Happy" + }, + { + "artist": "Bryan Ferry", + "disabled": false, + "favorite": false, + "guid": "9ccab14f-6ce8-85f6-84eb-c2ee893e3a5d", + "path": "z://CDG\\Sunfly Collection\\Sunfly Hits\\SF135\\Bryan Ferry - Let's Stick Together - SF135 - 05.mp3", + "title": "Let's Stick Together" + }, + { + "artist": "Snow Patrol", + "disabled": false, + "favorite": false, + "guid": "18b21f83-2c41-f548-3407-a891eb1885a0", + "path": "z://CDG\\Sunfly Collection\\Sunfly Hits\\SF245\\Snow Patrol - Chasing Cars - SF245 - 06.mp3", + "title": "Chasing Cars" + }, + { + "artist": "Janet Jackson", + "disabled": false, + "favorite": false, + "genre": "Karaoke", + "guid": "3952c8ba-e218-ea19-be21-e42791836cb7", + "path": "z://MP4\\KaraokeOnVEVO\\Janet Jackson - Nasty.mp4", + "title": "Nasty" + }, + { + "artist": "Corby, Matt", + "disabled": false, + "favorite": false, + "guid": "2f699149-d5e9-f17b-a93e-eae193630509", + "path": "z://CDG\\Big Hits Karaoke\\BHK028\\BHK028-01 - Corby, Matt - Brother.mp3", + "title": "Brother" + }, + { + "artist": "Little Richard", + "disabled": false, + "favorite": false, + "guid": "d6934a39-5b22-598c-8817-80120e3e91c9", + "path": "z://CDG\\Various\\Little Richard - Good Golly Miss Molly.mp3", + "title": "Good Golly Miss Molly" + }, + { + "artist": "The Flower Drum Song", + "disabled": false, + "favorite": false, + "guid": "c47b99ea-e8c4-66d2-6195-8aa44b8e3896", + "path": "z://MP4\\Let's Sing Karaoke\\Flower Drum Song, The - I Enjoy Being A Girl (Karaoke & Lyrics).mp4", + "title": "I Enjoy Being A Girl" + }, + { + "artist": "Dolly Parton", + "disabled": false, + "favorite": false, + "genre": "Karaoke", + "guid": "9567a6ed-b31c-eb8d-7cdd-f99e09a29f1d", + "path": "z://MP4\\KaraokeOnVEVO\\Dolly Parton - Here You Come Again.mp4", + "title": "Here You Come Again" + }, + { + "artist": "KSI ft. Tom Grennan", + "disabled": false, + "favorite": false, + "genre": "Karaoke", + "guid": "a53bc0b1-9eff-7710-fc02-b669bf657325", + "path": "z://MP4\\ZoomKaraokeOfficial\\KSI feat Tom Grennan - Not Over Yet.mp4", + "title": "Not Over Yet" + }, + { + "artist": "The Doobie Brothers", + "disabled": false, + "favorite": false, + "guid": "0876620f-5a41-af24-42b5-965e9d767819", + "path": "z://MP4\\Stingray Karaoke\\Minute By Minute The Doobie Brothers Karaoke with Lyrics.mp4", + "title": "Minute By Minute" + }, + { + "artist": "Ariana Grande ft. The Weeknd", + "disabled": false, + "favorite": false, + "genre": "Karaoke", + "guid": "4dcd11fa-3caf-5818-f3de-5b99cf722c80", + "path": "z://MP4\\Sing King Karaoke\\Ariana Grande & The Weeknd - Love Me Harder.mp4", + "title": "Love Me Harder" + } +] \ No newline at end of file diff --git a/data/songs_20.json b/data/songs_20.json new file mode 100644 index 0000000..bfdfbf9 --- /dev/null +++ b/data/songs_20.json @@ -0,0 +1,162 @@ +[ + { + "artist": "AC/DC", + "disabled": false, + "favorite": false, + "guid": "8946008c-7acc-d187-60e6-5286e55ad502", + "path": "z://MP4\\ACDC - Shot In The Dark (Karaoke Version).mp4", + "title": "Shot In The Dark" + }, + { + "artist": "Afrojack ft. Chris Brown", + "disabled": false, + "favorite": false, + "guid": "f9fd52fc-0c5a-01d5-caf5-4476e9172401", + "path": "z://MP4\\Afrojack and Chris Brown - As Your Friend.mp4", + "title": "As Your Friend" + }, + { + "artist": "Andy Grammer", + "disabled": false, + "favorite": false, + "guid": "ff27874e-68c3-2c0b-d302-a4bf36d2f76c", + "path": "z://MP4\\Andy Grammer - Honey I Am Good.mp4", + "title": "Honey I Am Good" + }, + { + "artist": "Avicii ft. Nicky Romero", + "disabled": false, + "favorite": false, + "guid": "9df89f14-a568-14a3-5081-77d6ee3d5cf8", + "path": "z://MP4\\Avicii and Nicky Romero - I Could Be The One.mp4", + "title": "I Could Be The One" + }, + { + "artist": "Bastille", + "disabled": false, + "favorite": false, + "guid": "71b6e7e2-3ad5-1b46-b4b5-d33b961451f7", + "path": "z://MP4\\Bastille - Flaws.mp4", + "title": "Flaws" + }, + { + "artist": "Big Sean ft. E40", + "disabled": false, + "favorite": false, + "guid": "3ff82151-be07-70e0-7e05-145dc0abec4a", + "path": "z://MP4\\Big Sean and E40 - I Don t F--k With You.mp4", + "title": "I Don t F--k With You" + }, + { + "artist": "Blake Shelton ft. Gwen Stefani", + "disabled": false, + "favorite": false, + "guid": "0443d7b1-daa0-6278-fe68-635a91ef7fcf", + "path": "z://MP4\\Blake Shelton feat. Gwen Stefani - Happy Anywhere (Karaoke Version).mp4", + "title": "Happy Anywhere" + }, + { + "artist": "Bradley Cooper", + "disabled": false, + "favorite": false, + "guid": "fa1a3926-96e0-3615-9785-9c24f39a937a", + "path": "z://MP4\\Bradley Cooper - Maybe It's Time (Karaoke Instrumental) A Star Is Born.mp4", + "title": "Maybe It's Time (A Star is Born)" + }, + { + "artist": "Bruno Mars ft. Cardi B", + "disabled": false, + "favorite": false, + "guid": "946a1077-ab9e-300c-3a72-b1e141e9706f", + "path": "z://MP4\\Bruno Mars ft. Cardi B - Finesse Remix (Karaoke Version).mp4", + "title": "Finesse Remix" + }, + { + "artist": "Cardi B ft. Bad Bunny, J Balvin", + "disabled": false, + "favorite": false, + "guid": "c295023e-e8db-edea-0d78-33125910fafd", + "path": "z://MP4\\Cardi B, Bad Bunny & J Balvin - I Like It Karaoke Lyrics Instrumental Cover Full Tracks.mp4", + "title": "I Like It" + }, + { + "artist": "Coldplay", + "disabled": false, + "favorite": false, + "guid": "3d0f477b-0c30-adac-57e8-155937c027ec", + "path": "z://MP4\\Coldplay - Orphans Karaoke Instrumetal Lyrics Cover Original Key A.mp4", + "title": "Orphans" + }, + { + "artist": "Juice WRLD & Marshmello", + "disabled": false, + "favorite": false, + "guid": "24bbbfd4-3ee5-0661-9f77-5a27ff024656", + "path": "z://MP4\\Come & Go - Juice WRLD & Marshmello (Karaoke Instrumental).mp4", + "title": "Come & Go" + }, + { + "artist": "Conor Maynard and Wiley", + "disabled": false, + "favorite": false, + "guid": "136d0364-c760-3faa-892c-e6553cf0d6e4", + "path": "z://MP4\\Conor Maynard and Wiley - Animal.mp4", + "title": "Animal" + }, + { + "artist": "Drake", + "disabled": false, + "favorite": false, + "guid": "125df572-009f-057c-b0d3-2403bb46519a", + "path": "z://MP4\\Drake - Nice For What (Karaoke).mp4", + "title": "Nice For What" + }, + { + "artist": "Eazy E", + "disabled": false, + "favorite": false, + "guid": "b129ac3c-45c0-dc8a-2f6b-a42c33a2f8bc", + "path": "z://MP4\\Eazy E - Gimme That Nut.mp4", + "title": "Gimme That Nut" + }, + { + "artist": "Eazy-E", + "disabled": false, + "favorite": false, + "guid": "a0a2bdc6-ddb2-16d0-5331-0251d47fc7a0", + "path": "z://MP4\\Eazy-E - Boyz n The Hood.mp4", + "title": "Boyz n The Hood" + }, + { + "artist": "Ellie Goulding", + "disabled": false, + "favorite": false, + "guid": "0693b62a-c07c-2fce-786a-463a7d85adf4", + "path": "z://MP4\\Ellie Goulding - Love Me Like You Do.mp4", + "title": "Love Me Like You Do" + }, + { + "artist": "Halsey ft. Machine Gun Kelly", + "disabled": false, + "favorite": false, + "guid": "705a1f72-ee7d-9234-6d62-8bed0309e6f9", + "path": "z://MP4\\Forget Me Too - Halsey Machine Gun Kelly (Karaoke Version).mp4", + "title": "Forget Me Too" + }, + { + "artist": "Frank Sinatra", + "disabled": false, + "favorite": false, + "guid": "7b458762-e33b-98c4-b5c3-ac60f397c075", + "path": "z://MP4\\Frank Sinatra - New York, New York.mp4", + "title": "New York, New York" + }, + { + "artist": "Garth Brooks", + "disabled": false, + "favorite": false, + "guid": "953d8eae-4be3-2b08-8695-28afaa4bd468", + "path": "z://MP4\\Garth Brooks - Ain t Goin Down (Til The Sun Comes Up).mp4", + "title": "Ain t Goin Down (Til The Sun Comes Up)" + } +] \ No newline at end of file diff --git a/data/songs_5.json b/data/songs_5.json new file mode 100644 index 0000000..7ce8dee --- /dev/null +++ b/data/songs_5.json @@ -0,0 +1,50 @@ +[ + { + "artist": "Eazy-E", + "disabled": false, + "favorite": false, + "guid": "a0a2bdc6-ddb2-16d0-5331-0251d47fc7a0", + "path": "z://MP4\\Eazy-E - Boyz n The Hood.mp4", + "title": "Boyz n The Hood" + }, + { + "artist": "21 Savage ft. Offset w~ Metro Boomin & Travis Scott", + "disabled": false, + "favorite": false, + "guid": "4ea9624f-e7b6-4db0-839c-e9e163c452ce", + "path": "z://MP4\\Karaoke Sing Sing\\21 Savage ft Offset w~ Metro Boomin & Travis Scott - Ghostface Killers (Karaoke Version).mp4", + "title": "Ghostface Killers" + }, + { + "artist": "Afrojack ft. Chris Brown", + "disabled": false, + "favorite": false, + "guid": "f9fd52fc-0c5a-01d5-caf5-4476e9172401", + "path": "z://MP4\\Afrojack and Chris Brown - As Your Friend.mp4", + "title": "As Your Friend" + }, + { + "artist": "Andy Grammer", + "disabled": false, + "favorite": false, + "guid": "ff27874e-68c3-2c0b-d302-a4bf36d2f76c", + "path": "z://MP4\\Andy Grammer - Honey I Am Good.mp4", + "title": "Honey I Am Good" + }, + { + "artist": "Avicii ft. Nicky Romero", + "disabled": false, + "favorite": false, + "guid": "9df89f14-a568-14a3-5081-77d6ee3d5cf8", + "path": "z://MP4\\Avicii and Nicky Romero - I Could Be The One.mp4", + "title": "I Could Be The One" + }, + { + "artist": "Bastille", + "disabled": false, + "favorite": false, + "guid": "71b6e7e2-3ad5-1b46-b4b5-d33b961451f7", + "path": "z://MP4\\Bastille - Flaws.mp4", + "title": "Flaws" + } +] \ No newline at end of file diff --git a/data/songs_blink182.json b/data/songs_blink182.json new file mode 100644 index 0000000..2de9562 --- /dev/null +++ b/data/songs_blink182.json @@ -0,0 +1,66 @@ +[ + { + "artist": "Blink-182", + "disabled": false, + "favorite": false, + "guid": "49e79b7b-a610-a8b2-cee3-a4580ba5bb1e", + "path": "z://MP4\\KaraFun Karaoke\\Karaoke I Miss You - Blink-182.mp4", + "title": "I Miss You" + }, + { + "artist": "Blink-182", + "disabled": false, + "favorite": false, + "guid": "cb394951-2094-513c-c0a5-663c01bfd5ed", + "path": "z://MP4\\KaraFun Karaoke\\Karaoke What s My Age Again - Blink-182.mp4", + "title": "What s My Age Again" + }, + { + "artist": "Blink-182", + "disabled": false, + "favorite": false, + "guid": "5d09cee0-40c2-d202-7c16-cbea8a4e892a", + "path": "z://MP4\\KaraFun Karaoke\\Karaoke First Date - Blink-182.mp4", + "title": "First Date" + }, + { + "artist": "Blink-182", + "disabled": false, + "favorite": false, + "guid": "147425-unknown", + "path": "z://MP4\\Sing King Karaoke\\blink-182 - All the Small Things.mp4", + "title": "All the Small Things" + }, + { + "artist": "Blink-182", + "disabled": false, + "favorite": false, + "guid": "126671-unknown", + "path": "z://MP4\\Let's Sing Karaoke\\Blink-182 - The Rock Show.mp4", + "title": "The Rock Show" + }, + { + "artist": "Blink-182", + "disabled": false, + "favorite": false, + "guid": "307387-unknown", + "path": "z://CDG\\SBI\\SBI-05\\Blink-182 - Reckless Abandon.mp3", + "title": "Reckless Abandon" + }, + { + "artist": "Blink-182", + "disabled": false, + "favorite": false, + "guid": "18864-unknown", + "path": "z://MP4\\KaraFun Karaoke\\Karaoke Dammit (Growing Up) - Blink-182.mp4", + "title": "Dammit (Growing Up)" + }, + { + "artist": "Blink-182", + "disabled": false, + "favorite": false, + "guid": "22728-unknown", + "path": "z://MP4\\KaraFun Karaoke\\Karaoke Feeling This - Blink-182.mp4", + "title": "Feeling This" + } +] \ No newline at end of file diff --git a/data/songs_edge_cases.json b/data/songs_edge_cases.json new file mode 100644 index 0000000..7035a55 --- /dev/null +++ b/data/songs_edge_cases.json @@ -0,0 +1,260 @@ +[ + { + "artist": "Pitbull ft. Ne-Yo, Afrojack & Nayer", + "disabled": false, + "favorite": false, + "genre": "Karaoke", + "guid": "17685554-1e6d-09be-1888-d16cad296928", + "path": "z://MP4\\Sing King Karaoke\\Pitbull Featuring Ne-Yo, Afrojack & Nayer - Give Me Everything.mp4", + "title": "Give Me Everything" + }, + { + "artist": "Aretha Franklin ft. G. Michael", + "disabled": false, + "favorite": false, + "genre": "Karaoke", + "guid": "c55f8a87-b921-8239-7ea5-a7a7b8d27a5b", + "path": "z://MP4\\KaraokeOnVEVO\\Aretha Franklin & G. Michael - I Knew You Were Waiting For Me.mp4", + "title": "I Knew You Were Waiting For Me" + }, + { + "artist": "P!nk", + "disabled": false, + "favorite": false, + "guid": "test-1", + "path": "test/path/pink.mp4", + "title": "Just Give Me a Reason" + }, + { + "artist": "Panic! At The Disco", + "disabled": false, + "favorite": false, + "guid": "test-2", + "path": "test/path/panic.mp4", + "title": "High Hopes" + }, + { + "artist": "3OH!3", + "disabled": false, + "favorite": false, + "guid": "test-3", + "path": "test/path/3oh3.mp4", + "title": "Don't Trust Me" + }, + { + "artist": "a-ha", + "disabled": false, + "favorite": false, + "guid": "test-4", + "path": "test/path/aha.mp4", + "title": "Take On Me" + }, + { + "artist": "Ne-Yo", + "disabled": false, + "favorite": false, + "guid": "test-5", + "path": "test/path/neyo.mp4", + "title": "So Sick" + }, + { + "artist": "The B-52s", + "disabled": false, + "favorite": false, + "guid": "test-6", + "path": "test/path/b52s.mp4", + "title": "Love Shack" + }, + { + "artist": "Salt-N-Pepa", + "disabled": false, + "favorite": false, + "guid": "test-7", + "path": "test/path/saltnpepa.mp4", + "title": "Push It" + }, + { + "artist": "Sir Mix-A-Lot", + "disabled": false, + "favorite": false, + "guid": "test-8", + "path": "test/path/sirmixalot.mp4", + "title": "Baby Got Back" + }, + { + "artist": "O-Town", + "disabled": false, + "favorite": false, + "guid": "test-9", + "path": "test/path/otown.mp4", + "title": "All or Nothing" + }, + { + "artist": "Jay-Z", + "disabled": false, + "favorite": false, + "guid": "test-10", + "path": "test/path/jayz.mp4", + "title": "Empire State of Mind" + }, + { + "artist": "G-Eazy & Halsey", + "disabled": false, + "favorite": false, + "guid": "test-11", + "path": "test/path/geazy.mp4", + "title": "Him & I" + }, + { + "artist": "Bachman-Turner Overdrive", + "disabled": false, + "favorite": false, + "guid": "test-12", + "path": "test/path/bto.mp4", + "title": "You Ain't Seen Nothing Yet" + }, + { + "artist": "Maroon 5", + "disabled": false, + "favorite": false, + "guid": "test-13", + "path": "test/path/maroon5.mp4", + "title": "Sugar" + }, + { + "artist": "U2", + "disabled": false, + "favorite": false, + "guid": "test-14", + "path": "test/path/u2.mp4", + "title": "With or Without You" + }, + { + "artist": "3 Doors Down", + "disabled": false, + "favorite": false, + "guid": "test-15", + "path": "test/path/3doors.mp4", + "title": "Kryptonite" + }, + { + "artist": "The Jackson 5", + "disabled": false, + "favorite": false, + "guid": "test-16", + "path": "test/path/jackson5.mp4", + "title": "I Want You Back" + }, + { + "artist": "The 1975", + "disabled": false, + "favorite": false, + "guid": "test-17", + "path": "test/path/1975.mp4", + "title": "Chocolate" + }, + { + "artist": "S Club 7", + "disabled": false, + "favorite": false, + "guid": "test-18", + "path": "test/path/sclub7.mp4", + "title": "Never Had a Dream Come True" + }, + { + "artist": "Sum 41", + "disabled": false, + "favorite": false, + "guid": "test-19", + "path": "test/path/sum41.mp4", + "title": "In Too Deep" + }, + { + "artist": "98 Degrees", + "disabled": false, + "favorite": false, + "guid": "test-20", + "path": "test/path/98degrees.mp4", + "title": "Because of You" + }, + { + "artist": "Shawn Mendes ft. Camila Cabello", + "disabled": false, + "favorite": false, + "guid": "test-21", + "path": "test/path/shawncamila.mp4", + "title": "Señorita" + }, + { + "artist": "Rihanna ft. Drake", + "disabled": false, + "favorite": false, + "guid": "test-22", + "path": "test/path/rihannadrake.mp4", + "title": "What's My Name" + }, + { + "artist": "Calvin Harris ft. Ellie Goulding", + "disabled": false, + "favorite": false, + "guid": "test-23", + "path": "test/path/calvinellie.mp4", + "title": "Outside" + }, + { + "artist": "Wiz Khalifa ft. Charlie Puth", + "disabled": false, + "favorite": false, + "guid": "test-24", + "path": "test/path/wizcharlie.mp4", + "title": "See You Again" + }, + { + "artist": "The Weeknd ft. Daft Punk", + "disabled": false, + "favorite": false, + "guid": "test-25", + "path": "test/path/weeknddaftpunk.mp4", + "title": "Starboy" + }, + { + "artist": "MAGIC!", + "disabled": false, + "favorite": false, + "guid": "test-26", + "path": "test/path/magic.mp4", + "title": "Rude" + }, + { + "artist": "Tony! Toni! Tone!", + "disabled": false, + "favorite": false, + "guid": "test-27", + "path": "test/path/tonytonitone.mp4", + "title": "If I Had No Loot" + }, + { + "artist": "Little Mix", + "disabled": false, + "favorite": false, + "guid": "test-28", + "path": "test/path/littlemix.mp4", + "title": "Black Magic" + }, + { + "artist": "Anne-Marie", + "disabled": false, + "favorite": false, + "guid": "test-29", + "path": "test/path/annemarie.mp4", + "title": "2002" + }, + { + "artist": "Sophie Ellis-Bextor", + "disabled": false, + "favorite": false, + "guid": "test-30", + "path": "test/path/sophieellis.mp4", + "title": "Murder on the Dancefloor" + } +] \ No newline at end of file diff --git a/data/test_imported_artists.json b/data/test_imported_artists.json deleted file mode 100644 index ec06292..0000000 --- a/data/test_imported_artists.json +++ /dev/null @@ -1,26 +0,0 @@ -[ - { - "artist": "Brainman", - "title": "Test Song", - "disabled": false, - "favorite": false, - "guid": "test-guid-1", - "path": "test/path/1" - }, - { - "artist": "Far Too Jones", - "title": "Another Test", - "disabled": false, - "favorite": false, - "guid": "test-guid-2", - "path": "test/path/2" - }, - { - "artist": "Buddy Merrill", - "title": "Test Recording", - "disabled": false, - "favorite": false, - "guid": "test-guid-3", - "path": "test/path/3" - } -] \ No newline at end of file diff --git a/data/test_not_found.json b/data/test_not_found.json new file mode 100644 index 0000000..313aff6 --- /dev/null +++ b/data/test_not_found.json @@ -0,0 +1,42 @@ +[ + { + "artist": "Lavato, Demi & Joe Jonas", + "title": "This Is Me", + "guid": "7c1b42d7-14e4-2f6d-40ba-fa41a217c0ab" + }, + { + "artist": "Drake", + "title": "Passionfruit", + "guid": "2db9fd74-3353-0fb2-a289-666ad6e1020a" + }, + { + "artist": "Coldplay", + "title": "Paradise", + "guid": "7820a2af-e4fa-f3a2-d070-46ad73e08ace" + }, + { + "artist": "Chris Brown Featuring T-Pain", + "title": "Kiss Kiss", + "guid": "b0329fd3-dadd-bee1-8807-ae090b3a1b64" + }, + { + "artist": "Destiny's Child", + "title": "Survivor", + "guid": "24f3fb48-ea83-12c1-f998-cfbb917ab927" + }, + { + "artist": "Nicki Minaj Ft.drake", + "title": "Moment 4 Life (clean)", + "guid": "49a1a33d-799b-bbfd-1875-3c551424f75a" + }, + { + "artist": "Corby, Matt", + "title": "Brother", + "guid": "2f699149-d5e9-f17b-a93e-eae193630509" + }, + { + "artist": "The Flower Drum Song", + "title": "I Enjoy Being A Girl", + "guid": "c47b99ea-e8c4-66d2-6195-8aa44b8e3896" + } +] \ No newline at end of file diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..cb95ce3 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,22 @@ +services: + musicbrainz-cleaner: + build: . + container_name: musicbrainz-cleaner + networks: + - musicbrainz-network + volumes: + - ./data:/app/data + - ./output:/app/output + - .:/app + environment: + - DB_HOST=db + - DB_PORT=5432 + - DB_NAME=musicbrainz_db + - DB_USER=musicbrainz + - DB_PASSWORD=musicbrainz + command: ["python3", "simple_test.py"] + +networks: + musicbrainz-network: + external: true + name: musicbrainz-docker_default \ No newline at end of file diff --git a/src/api/database.py b/src/api/database.py index d551935..bbed99e 100644 --- a/src/api/database.py +++ b/src/api/database.py @@ -32,8 +32,9 @@ class MusicBrainzDatabase: def connect(self) -> bool: """Establish connection to the MusicBrainz database.""" try: + # Use the direct connection method that works self.connection = psycopg2.connect( - host=self.host, + host='172.18.0.2', # Docker container IP that works port=self.port, database=self.database, user=self.user, @@ -43,6 +44,7 @@ class MusicBrainzDatabase: self.cursor = self.connection.cursor(cursor_factory=psycopg2.extras.RealDictCursor) print(SUCCESS_MESSAGES['db_connected']) return True + except Exception as e: print(ERROR_MESSAGES['db_connection_failed']) print(f"Connection error: {e}") @@ -73,111 +75,367 @@ class MusicBrainzDatabase: return None try: - # Get all artists from database + # Clean artist name + clean_name = artist_name.strip() + + # Handle collaborations - extract main artist + if 'ft.' in clean_name.lower(): + main_artist = clean_name.split('ft.')[0].strip() + elif 'feat.' in clean_name.lower(): + main_artist = clean_name.split('feat.')[0].strip() + else: + main_artist = clean_name + + # Try exact match first - but check for multiple matches + query = "SELECT name, gid FROM artist WHERE name = %s" + self.cursor.execute(query, (main_artist,)) + results = self.cursor.fetchall() + + if results: + if len(results) == 1: + # Only one exact match, use it + result = results[0] + print(SUCCESS_MESSAGES['fuzzy_match_found'].format( + original=artist_name, + matched=result['name'], + score=1.0 + )) + return (result['name'], result['gid'], 1.0) + else: + # Multiple exact matches - choose the one with most recordings + print(f"🔍 Found {len(results)} exact matches for '{main_artist}', checking recording counts...") + artists_with_counts = [] + for artist in results: + query = """ + SELECT COUNT(r.id) as recording_count + FROM artist a + JOIN artist_credit_name acn ON a.id = acn.artist + JOIN artist_credit ac ON acn.artist_credit = ac.id + JOIN recording r ON ac.id = r.artist_credit + WHERE a.gid = %s + """ + self.cursor.execute(query, (artist['gid'],)) + count_result = self.cursor.fetchone() + recording_count = count_result['recording_count'] if count_result else 0 + artists_with_counts.append({ + 'name': artist['name'], + 'gid': artist['gid'], + 'recording_count': recording_count + }) + print(f" 📊 {artist['name']} ({artist['gid'][:8]}...): {recording_count} recordings") + + # Choose the artist with the most recordings + best_artist = max(artists_with_counts, key=lambda x: x['recording_count']) + print(f" 🎯 Selected: {best_artist['name']} with {best_artist['recording_count']} recordings") + + print(SUCCESS_MESSAGES['fuzzy_match_found'].format( + original=artist_name, + matched=best_artist['name'], + score=1.0 + )) + return (best_artist['name'], best_artist['gid'], 1.0) + + # Try case-insensitive match + query = "SELECT name, gid FROM artist WHERE LOWER(name) = LOWER(%s) LIMIT 1" + self.cursor.execute(query, (main_artist,)) + result = self.cursor.fetchone() + if result: + print(SUCCESS_MESSAGES['fuzzy_match_found'].format( + original=artist_name, + matched=result['name'], + score=0.99 + )) + return (result['name'], result['gid'], 0.99) + + # Try matching with different dash characters + # Replace regular dash with Unicode dash and vice versa + dash_variants = [ + main_artist.replace('-', '‐'), # Regular dash to Unicode dash + main_artist.replace('‐', '-'), # Unicode dash to regular dash + ] + + for variant in dash_variants: + if variant != main_artist: + query = "SELECT name, gid FROM artist WHERE name = %s LIMIT 1" + self.cursor.execute(query, (variant,)) + result = self.cursor.fetchone() + if result: + print(SUCCESS_MESSAGES['fuzzy_match_found'].format( + original=artist_name, + matched=result['name'], + score=0.98 + )) + return (result['name'], result['gid'], 0.98) + + # Try fuzzy search + # Use more specific patterns to avoid false matches + # Also try different dash characters and name variations + dash_variants = [ + main_artist, + main_artist.replace('-', '‐'), # Regular dash to Unicode dash + main_artist.replace('‐', '-'), # Unicode dash to regular dash + ] + + # Add name variations for common cases like "S Club 7" -> "S Club" + name_variants = [] + for variant in dash_variants: + name_variants.append(variant) + # Handle cases like "S Club 7" -> "S Club" + if ' ' in variant: + parts = variant.split() + if len(parts) > 1: + # Try without the last part if it's a number (e.g., "S Club 7" -> "S Club") + if parts[-1].isdigit(): + name_variants.append(' '.join(parts[:-1])) + + # First try exact matches with name variations + for variant in name_variants: + # Try exact match in artist names + query = "SELECT name, gid FROM artist WHERE name = %s LIMIT 1" + self.cursor.execute(query, (variant,)) + result = self.cursor.fetchone() + if result: + print(SUCCESS_MESSAGES['fuzzy_match_found'].format( + original=artist_name, + matched=result['name'], + score=0.95 + )) + return (result['name'], result['gid'], 0.95) + + # Try exact match in artist aliases query = """ - SELECT name, gid as mbid - FROM artist - WHERE name IS NOT NULL - AND name != '' - ORDER BY name + SELECT a.name, a.gid + FROM artist a + JOIN artist_alias aa ON a.id = aa.artist + WHERE aa.name = %s + LIMIT 1 """ - self.cursor.execute(query) - artists = self.cursor.fetchall() + self.cursor.execute(query, (variant,)) + result = self.cursor.fetchone() + if result: + print(SUCCESS_MESSAGES['fuzzy_match_found'].format( + original=artist_name, + matched=result['name'], + score=0.94 + )) + return (result['name'], result['gid'], 0.94) + + # Try exact match in sort_name field + query = "SELECT name, gid FROM artist WHERE sort_name = %s LIMIT 1" + self.cursor.execute(query, (variant,)) + result = self.cursor.fetchone() + if result: + print(SUCCESS_MESSAGES['fuzzy_match_found'].format( + original=artist_name, + matched=result['name'], + score=0.93 + )) + return (result['name'], result['gid'], 0.93) + + # Then try starts-with patterns for better precision + patterns = [] + for variant in name_variants: + patterns.extend([ + f"{variant}%", # Starts with (most precise) + f" {variant} ", # Word boundary + f"%{variant}%", # Contains (least precise) + ]) + + all_artists = [] + for pattern in patterns: + # Search in artist names + query = "SELECT name, gid FROM artist WHERE name ILIKE %s LIMIT 20" + self.cursor.execute(query, (pattern,)) + artists = self.cursor.fetchall() + all_artists.extend(artists) + + # Search in artist aliases + query = """ + SELECT a.name, a.gid + FROM artist a + JOIN artist_alias aa ON a.id = aa.artist + WHERE aa.name ILIKE %s + LIMIT 20 + """ + self.cursor.execute(query, (pattern,)) + artists = self.cursor.fetchall() + all_artists.extend(artists) + + # Search in sort_name field + query = "SELECT name, gid FROM artist WHERE sort_name ILIKE %s LIMIT 20" + self.cursor.execute(query, (pattern,)) + artists = self.cursor.fetchall() + all_artists.extend(artists) + + # Remove duplicates based on MBID + seen_mbids = set() + unique_artists = [] + for artist in all_artists: + if artist['gid'] not in seen_mbids: + seen_mbids.add(artist['gid']) + unique_artists.append(artist) + + # Get recording counts for all artists to prioritize famous ones + artists_with_counts = [] + print(f"🔍 Found {len(unique_artists)} unique artists, checking recording counts...") + for artist in unique_artists: + query = """ + SELECT COUNT(r.id) as recording_count + FROM artist a + JOIN artist_credit_name acn ON a.id = acn.artist + JOIN artist_credit ac ON acn.artist_credit = ac.id + JOIN recording r ON ac.id = r.artist_credit + WHERE a.gid = %s + """ + self.cursor.execute(query, (artist['gid'],)) + result = self.cursor.fetchone() + recording_count = result['recording_count'] if result else 0 + artists_with_counts.append({ + 'name': artist['name'], + 'gid': artist['gid'], + 'recording_count': recording_count + }) + print(f" 📊 {artist['name']} ({artist['gid'][:8]}...): {recording_count} recordings") best_match = None best_score = 0 + best_recording_count = 0 - for artist in artists: - # Use different fuzzy matching algorithms - ratio = fuzz.ratio(artist_name.lower(), artist['name'].lower()) - partial_ratio = fuzz.partial_ratio(artist_name.lower(), artist['name'].lower()) - token_sort_ratio = fuzz.token_sort_ratio(artist_name.lower(), artist['name'].lower()) + for artist in artists_with_counts: + score = fuzz.ratio(main_artist.lower(), artist['name'].lower()) - # Use the best score from all algorithms - score = max(ratio, partial_ratio, token_sort_ratio) / 100.0 + # Only apply strict filtering for very obvious false matches + # like "Sleazy-E" vs "Eazy-E" where one is clearly a substring of the other + if (main_artist.lower() in artist['name'].lower() and + artist['name'].lower() != main_artist.lower() and + len(artist['name']) > len(main_artist) + 2): + # This is likely a false match, require very high score + if score < 90: + continue - if score > best_score and score >= ARTIST_SIMILARITY_THRESHOLD: + # Prioritize artists with more recordings (famous artists) + # If scores are close (within 5 points), prefer the one with more recordings + if (score > best_score or + (score >= best_score - 5 and artist['recording_count'] > best_recording_count * 2)): best_score = score - best_match = (artist['name'], artist['mbid'], score) + best_match = artist + best_recording_count = artist['recording_count'] + print(f" 🎯 New best match: {artist['name']} (score: {score}, recordings: {artist['recording_count']})") - if best_match: + if best_score >= 80: print(SUCCESS_MESSAGES['fuzzy_match_found'].format( original=artist_name, - matched=best_match[0], - score=best_match[2] + matched=best_match['name'], + score=best_score / 100.0 )) - return best_match - else: - print(ERROR_MESSAGES['no_fuzzy_match'].format(query=artist_name)) - return None - + return (best_match['name'], best_match['gid'], best_score / 100.0) + + return None + except Exception as e: - print(ERROR_MESSAGES['db_query_failed'].format(error=str(e))) + print(f"Error in fuzzy_search_artist: {e}") return None def fuzzy_search_recording(self, title: str, artist_mbid: Optional[str] = None) -> Optional[Tuple[str, str, float]]: """ - Fuzzy search for recording by title and optionally artist. - Returns (recording_title, mbid, similarity_score) or None. + Fuzzy search for recording by title and optionally artist MBID. + Returns (recording_name, mbid, similarity_score) or None. """ if not self.connection: return None try: - # Build query based on whether we have artist MBID + # Clean title + clean_title = title.strip() + + # Remove karaoke patterns + karaoke_patterns = [ + r'\s*\(Karaoke Version\)', + r'\s*\(Karaoke\)', + r'\s*\(Instrumental\)', + r'\s*\(Backing Track\)', + ] + import re + for pattern in karaoke_patterns: + clean_title = re.sub(pattern, '', clean_title, flags=re.IGNORECASE) + clean_title = clean_title.strip() + if artist_mbid: + # Try to find recording by artist and title query = """ - SELECT r.name as title, r.gid as mbid + SELECT r.name, r.gid FROM recording r JOIN artist_credit_name acn ON r.artist_credit = acn.artist_credit JOIN artist a ON acn.artist = a.id - WHERE r.name IS NOT NULL - AND r.name != '' - AND a.gid = %s - ORDER BY r.name + WHERE a.gid = %s AND r.name ILIKE %s + LIMIT 1 + """ + self.cursor.execute(query, (artist_mbid, f"%{clean_title}%")) + result = self.cursor.fetchone() + + if result: + return (result['name'], result['gid'], 1.0) + + # Try fuzzy search + query = """ + SELECT r.name, r.gid + FROM recording r + JOIN artist_credit_name acn ON r.artist_credit = acn.artist_credit + JOIN artist a ON acn.artist = a.id + WHERE a.gid = %s + LIMIT 100 """ self.cursor.execute(query, (artist_mbid,)) + recordings = self.cursor.fetchall() + + best_match = None + best_score = 0 + + for recording in recordings: + score = fuzz.ratio(clean_title.lower(), recording['name'].lower()) + if score > best_score: + best_score = score + best_match = recording + + if best_score >= 80: + return (best_match['name'], best_match['gid'], best_score / 100.0) else: - query = """ - SELECT name as title, gid as mbid - FROM recording - WHERE name IS NOT NULL - AND name != '' - ORDER BY name - """ - self.cursor.execute(query) - - recordings = self.cursor.fetchall() - - best_match = None - best_score = 0 - - for recording in recordings: - # Use different fuzzy matching algorithms - ratio = fuzz.ratio(title.lower(), recording['title'].lower()) - partial_ratio = fuzz.partial_ratio(title.lower(), recording['title'].lower()) - token_sort_ratio = fuzz.token_sort_ratio(title.lower(), recording['title'].lower()) + # No artist constraint - search by title only, but be more specific + # Try exact match first + query = "SELECT name, gid FROM recording WHERE name = %s LIMIT 1" + self.cursor.execute(query, (clean_title,)) + result = self.cursor.fetchone() + if result: + return (result['name'], result['gid'], 1.0) - # Use the best score from all algorithms - score = max(ratio, partial_ratio, token_sort_ratio) / 100.0 + # Try case-insensitive match + query = "SELECT name, gid FROM recording WHERE LOWER(name) = LOWER(%s) LIMIT 1" + self.cursor.execute(query, (clean_title,)) + result = self.cursor.fetchone() + if result: + return (result['name'], result['gid'], 0.99) - if score > best_score and score >= TITLE_SIMILARITY_THRESHOLD: - best_score = score - best_match = (recording['title'], recording['mbid'], score) + # Try partial match + query = "SELECT name, gid FROM recording WHERE name ILIKE %s LIMIT 50" + self.cursor.execute(query, (f"%{clean_title}%",)) + recordings = self.cursor.fetchall() + + best_match = None + best_score = 0 + + for recording in recordings: + score = fuzz.ratio(clean_title.lower(), recording['name'].lower()) + if score > best_score: + best_score = score + best_match = recording + + if best_score >= 80: + return (best_match['name'], best_match['gid'], best_score / 100.0) + + return None - if best_match: - print(SUCCESS_MESSAGES['fuzzy_match_found'].format( - original=title, - matched=best_match[0], - score=best_match[2] - )) - return best_match - else: - print(ERROR_MESSAGES['no_fuzzy_match'].format(query=title)) - return None - except Exception as e: - print(ERROR_MESSAGES['db_query_failed'].format(error=str(e))) + print(f"Error in fuzzy_search_recording: {e}") return None def get_artist_info(self, mbid: str) -> Optional[Dict[str, Any]]: @@ -187,7 +445,7 @@ class MusicBrainzDatabase: try: query = """ - SELECT name, gid as mbid, sort_name, type, gender, country + SELECT name, gid as mbid, sort_name FROM artist WHERE gid = %s """ @@ -203,42 +461,50 @@ class MusicBrainzDatabase: return None def get_recording_info(self, mbid: str) -> Optional[Dict[str, Any]]: - """Get detailed recording information by MBID.""" + """Get recording information by MBID.""" if not self.connection: return None try: query = """ - SELECT r.name as title, r.gid as mbid, r.length, r.comment, - acn.name as artist_name, acn.join_phrase + SELECT r.name as title, r.gid as mbid, ac.id as artist_credit_id FROM recording r - JOIN artist_credit_name acn ON r.artist_credit = acn.artist_credit + JOIN artist_credit ac ON r.artist_credit = ac.id WHERE r.gid = %s - ORDER BY acn.position + LIMIT 1 """ self.cursor.execute(query, (mbid,)) - recordings = self.cursor.fetchall() + result = self.cursor.fetchone() - if recordings: - # Build artist credit string - artist_credit = [] - for rec in recordings: - artist_credit.append(rec['artist_name']) - if rec['join_phrase']: - artist_credit.append(rec['join_phrase']) + if result: + # Get artist credit information + artist_query = """ + SELECT a.name, acn.join_phrase, acn.position + FROM artist_credit_name acn + JOIN artist a ON acn.artist = a.id + WHERE acn.artist_credit = %s + ORDER BY acn.position + """ + self.cursor.execute(artist_query, (result['artist_credit_id'],)) + artists = self.cursor.fetchall() - result = { - 'title': recordings[0]['title'], - 'mbid': recordings[0]['mbid'], - 'length': recordings[0]['length'], - 'comment': recordings[0]['comment'], - 'artist_credit': ''.join(artist_credit) + # Build the artist string + artist_string = "" + for artist in artists: + artist_string += artist['name'] + if artist['join_phrase']: + artist_string += artist['join_phrase'] + + return { + 'title': result['title'], + 'mbid': result['mbid'], + 'artist_credit': artist_string } - return result + return None except Exception as e: - print(ERROR_MESSAGES['db_query_failed'].format(error=str(e))) + print(f"Error in get_recording_info: {e}") return None def test_connection(self) -> bool: @@ -251,4 +517,184 @@ class MusicBrainzDatabase: return True except Exception as e: print(f"Database connection test failed: {e}") - return False \ No newline at end of file + return False + + def find_artist_credit(self, artist_name: str, title: str) -> Optional[Tuple[str, str, str]]: + """ + Find artist credit for collaboration. + Returns (artist_credit_id, artist_string, recording_mbid) or None. + """ + if not self.connection: + return None + + try: + # Clean artist name + clean_name = artist_name.strip() + + # Parse complex collaboration + main_artist, collaborators = self._parse_complex_collaboration(clean_name) + collaboration = len(collaborators) > 0 + + # Clean title + clean_title = title.strip() + karaoke_patterns = [ + r'\s*\(Karaoke Version\)', + r'\s*\(Karaoke\)', + r'\s*\(Instrumental\)', + r'\s*\(Backing Track\)', + ] + import re + for pattern in karaoke_patterns: + clean_title = re.sub(pattern, '', clean_title, flags=re.IGNORECASE) + clean_title = clean_title.strip() + + # Try to find recording with artist credit + query = """ + SELECT ac.id as artist_credit_id, r.gid as recording_mbid + FROM recording r + JOIN artist_credit ac ON r.artist_credit = ac.id + JOIN artist_credit_name acn ON ac.id = acn.artist_credit + JOIN artist a ON acn.artist = a.id + WHERE a.name = %s AND r.name ILIKE %s + LIMIT 1 + """ + self.cursor.execute(query, (main_artist, f"%{clean_title}%")) + result = self.cursor.fetchone() + + if result: + # Get the full artist string for this credit + artist_query = """ + SELECT a.name, acn.join_phrase, acn.position + FROM artist_credit_name acn + JOIN artist a ON acn.artist = a.id + WHERE acn.artist_credit = %s + ORDER BY acn.position + """ + self.cursor.execute(artist_query, (result['artist_credit_id'],)) + artists = self.cursor.fetchall() + + # Build the artist string + artist_string = "" + for artist in artists: + artist_string += artist['name'] + if artist['join_phrase']: + artist_string += artist['join_phrase'] + + return (str(result['artist_credit_id']), artist_string, result['recording_mbid']) + + return None + + except Exception as e: + print(f"Error in find_artist_credit: {e}") + return None + + def _parse_complex_collaboration(self, artist_string: str) -> Tuple[str, List[str]]: + """ + Parse complex collaboration strings like "Pitbull ft. Ne-Yo, Afrojack & Nayer" + Returns: (main_artist, [collaborators]) + """ + import re + + # Primary collaboration indicators + primary_patterns = ['ft.', 'feat.', 'featuring'] + + # Secondary collaboration indicators (need more careful handling) + secondary_patterns = ['&', 'and'] + + # Check if this is a collaboration + is_collaboration = False + split_pattern = None + + for pattern in primary_patterns: + if pattern.lower() in artist_string.lower(): + is_collaboration = True + # Find the actual pattern in the original string (case-insensitive) + import re + match = re.search(re.escape(pattern), artist_string, re.IGNORECASE) + if match: + split_pattern = match.group(0) + else: + split_pattern = pattern + break + + # If no primary collaboration found, check secondary patterns + if not is_collaboration: + for pattern in secondary_patterns: + if pattern.lower() in artist_string.lower(): + # For secondary patterns, be more careful + # Check if this looks like a band name vs collaboration + parts = artist_string.split(pattern) + if len(parts) == 2: + part1 = parts[0].strip() + part2 = parts[1].strip() + + # Check if this looks like a band name vs collaboration + # Band names typically have simple, short parts + # Collaborations often have more complex artist names + part1_words = len(part1.split()) + part2_words = len(part2.split()) + + # Check if this looks like a band name vs collaboration + # Band names typically have simple, short parts + # Collaborations often have more complex artist names + + # If one part has a comma, it's likely a collaboration + if ',' in part1 or ',' in part2: + is_collaboration = True + split_pattern = pattern + break + + # For "&" separators, be more permissive + # Most "&" separators in music are collaborations, not band names + # Only treat very obvious band names as non-collaborations + if part1_words == 1 and part2_words == 1: + # Check for very obvious band name patterns + obvious_band_names = ['simon & garfunkel', 'hall & oates', 'brooks & dunn'] + if artist_string.lower() in obvious_band_names: + # Likely a band name, skip + continue + + # Likely a collaboration + is_collaboration = True + split_pattern = pattern + break + + if not is_collaboration: + return (artist_string, []) + + # Split on the pattern + parts = artist_string.split(split_pattern) + if len(parts) < 2: + return (artist_string, []) + + main_artist = parts[0].strip() + collaborators_string = split_pattern.join(parts[1:]).strip() + + # Now parse the collaborators string which might have multiple separators + collaborators = self._parse_collaborators(collaborators_string) + + return (main_artist, collaborators) + + def _parse_collaborators(self, collaborators_string: str) -> List[str]: + """ + Parse a string like "Ne-Yo, Afrojack & Nayer" into individual artists + """ + import re + + if not collaborators_string: + return [] + + # Split on common separators: comma, ampersand, "and" + # Use regex to handle multiple separators + separators = r'[,&]|\band\b' + + # Split and clean up each part + parts = re.split(separators, collaborators_string, flags=re.IGNORECASE) + + collaborators = [] + for part in parts: + part = part.strip() + if part: # Skip empty parts + collaborators.append(part) + + return collaborators \ No newline at end of file diff --git a/src/cli/main.py b/src/cli/main.py index 05c7d0a..7ae79d6 100644 --- a/src/cli/main.py +++ b/src/cli/main.py @@ -9,7 +9,7 @@ import json import time import re from pathlib import Path -from typing import Dict, Optional, Any +from typing import Dict, Optional, Any, Tuple, List # Import constants from ..config.constants import ( @@ -44,7 +44,7 @@ class MusicBrainzCleaner: self.use_database = False # Initialize API client (fallback method) - if not use_database: + if not self.use_database: self.api = MusicBrainzAPIClient(base_url) def _clean_artist_name(self, artist_name: str) -> str: @@ -73,49 +73,22 @@ class MusicBrainzCleaner: def find_artist_mbid(self, artist_name: str) -> Optional[str]: clean_name = self._clean_artist_name(artist_name) - # Load known artists data first - known_artists = data_loader.load_known_artists() + # Handle collaborations - prioritize finding artist credit + # Use the same complex collaboration parsing as the database + main_artist, collaborators = self._parse_complex_collaboration(clean_name) + has_collaboration = len(collaborators) > 0 - # Check known artists first - for known_name, mbid in known_artists.items(): - if clean_name.lower() == known_name.lower(): - return mbid - - # Handle collaborations - try main artist first - if 'feat.' in clean_name.lower() or 'ft.' in artist_name.lower(): - # Extract main artist (before feat.) - main_artist = re.split(r'\s+feat\.?\s+', clean_name, flags=re.IGNORECASE)[0].strip() - - # Also try with original ft. pattern - if 'ft.' in artist_name.lower(): - main_artist_ft = re.split(r'\s+ft\.\s+', artist_name, flags=re.IGNORECASE)[0].strip() - if main_artist_ft: - main_artist = main_artist_ft - - # Check known artists for main artist - for known_name, mbid in known_artists.items(): - if main_artist.lower() == known_name.lower(): - return mbid - - # Try fuzzy search for main artist - if self.use_database: - result = self.db.fuzzy_search_artist(main_artist) - if result: - return result[1] # Return MBID - else: - # Fallback to API - try: - result = self.api.search_artist(main_artist) - if result: - return result['id'] - except: - pass + if has_collaboration: + # For collaborations, we'll handle this in find_recording_mbid + # by using the artist credit approach + # Return None here so we can find the full collaboration later + return None # Try fuzzy search for full artist name if self.use_database: result = self.db.fuzzy_search_artist(clean_name) - if result: - return result[1] # Return MBID + if result and isinstance(result, tuple) and len(result) >= 2: + return result[1] # Return MBID from tuple (artist_name, mbid, score) else: # Fallback to API try: @@ -135,30 +108,28 @@ class MusicBrainzCleaner: # Check known recordings first for (known_artist_mbid, known_title), recording_mbid in known_recordings.items(): - if artist_mbid == known_artist_mbid and clean_title.lower() in known_title.lower(): + if artist_mbid == known_artist_mbid and clean_title.lower() == known_title.lower(): return recording_mbid - # Try fuzzy search with artist MBID + # Handle collaborations using artist credit if self.use_database: - result = self.db.fuzzy_search_recording(clean_title, artist_mbid) - if result: - return result[1] # Return MBID - - # Try broader search without artist constraint - result = self.db.fuzzy_search_recording(clean_title) - if result: - return result[1] # Return MBID + # If no artist_mbid (collaboration case), try to find by title and original artist name + if not artist_mbid: + # This is a collaboration case, try to find by title + result = self.db.fuzzy_search_recording(clean_title) + if result and isinstance(result, tuple) and len(result) >= 2: + return result[1] # Return MBID from tuple (recording_name, mbid, score) + else: + # Regular case with artist_mbid + result = self.db.fuzzy_search_recording(clean_title, artist_mbid) + if result and isinstance(result, tuple) and len(result) >= 2: + return result[1] # Return MBID from tuple (recording_name, mbid, score) else: # Fallback to API try: result = self.api.search_recording(clean_title, artist_mbid) if result: return result['id'] - - # Try broader search without artist constraint - result = self.api.search_recording(clean_title) - if result: - return result['id'] except: pass @@ -185,12 +156,165 @@ class MusicBrainzCleaner: if credit.get('joinphrase'): parts.append(credit['joinphrase']) return ''.join(parts).strip() + + def _parse_complex_collaboration(self, artist_string: str) -> Tuple[str, List[str]]: + """ + Parse complex collaboration strings like "Pitbull ft. Ne-Yo, Afrojack & Nayer" + Returns: (main_artist, [collaborators]) + """ + import re + + # Primary collaboration indicators + primary_patterns = ['ft.', 'feat.', 'featuring'] + + # Secondary collaboration indicators (need more careful handling) + secondary_patterns = ['&', 'and'] + + # Check if this is a collaboration + is_collaboration = False + split_pattern = None + + for pattern in primary_patterns: + if pattern.lower() in artist_string.lower(): + is_collaboration = True + # Find the actual pattern in the original string (case-insensitive) + import re + match = re.search(re.escape(pattern), artist_string, re.IGNORECASE) + if match: + split_pattern = match.group(0) + else: + split_pattern = pattern + break + + # If no primary collaboration found, check secondary patterns + if not is_collaboration: + for pattern in secondary_patterns: + if pattern.lower() in artist_string.lower(): + # For secondary patterns, be more careful + # Check if this looks like a band name vs collaboration + parts = artist_string.split(pattern) + if len(parts) == 2: + part1 = parts[0].strip() + part2 = parts[1].strip() + + # Check if this looks like a band name vs collaboration + # Band names typically have simple, short parts + # Collaborations often have more complex artist names + part1_words = len(part1.split()) + part2_words = len(part2.split()) + + # Check if this looks like a band name vs collaboration + # Band names typically have simple, short parts + # Collaborations often have more complex artist names + + # If one part has a comma, it's likely a collaboration + if ',' in part1 or ',' in part2: + is_collaboration = True + split_pattern = pattern + break + + # For "&" separators, be more permissive + # Most "&" separators in music are collaborations, not band names + # Only treat very obvious band names as non-collaborations + if part1_words == 1 and part2_words == 1: + # Check for very obvious band name patterns + obvious_band_names = ['simon & garfunkel', 'hall & oates', 'brooks & dunn'] + if artist_string.lower() in obvious_band_names: + # Likely a band name, skip + continue + + # Likely a collaboration + is_collaboration = True + split_pattern = pattern + break + + if not is_collaboration: + return (artist_string, []) + + # Split on the pattern + parts = artist_string.split(split_pattern) + if len(parts) < 2: + return (artist_string, []) + + main_artist = parts[0].strip() + collaborators_string = split_pattern.join(parts[1:]).strip() + + # Now parse the collaborators string which might have multiple separators + collaborators = self._parse_collaborators(collaborators_string) + + return (main_artist, collaborators) + + def _parse_collaborators(self, collaborators_string: str) -> List[str]: + """ + Parse a string like "Ne-Yo, Afrojack & Nayer" into individual artists + """ + import re + + if not collaborators_string: + return [] + + # Split on common separators: comma, ampersand, "and" + # Use regex to handle multiple separators + separators = r'[,&]|\band\b' + + # Split and clean up each part + parts = re.split(separators, collaborators_string, flags=re.IGNORECASE) + + collaborators = [] + for part in parts: + part = part.strip() + if part: # Skip empty parts + collaborators.append(part) + + return collaborators def clean_song(self, song: Dict[str, Any]) -> Dict[str, Any]: print(f"Processing: {song.get('artist', 'Unknown')} - {song.get('title', 'Unknown')}") # Find artist MBID artist_mbid = self.find_artist_mbid(song.get('artist', '')) + + # Handle collaboration case (artist_mbid is None) + # Use the same complex collaboration parsing as the database + artist_name = song.get('artist', '') + main_artist, collaborators = self._parse_complex_collaboration(artist_name) + has_collaboration = len(collaborators) > 0 + + if artist_mbid is None and has_collaboration: + print(f" 🎯 Collaboration detected: {song.get('artist')}") + # Try to find recording using artist credit approach + if self.use_database: + result = self.db.find_artist_credit(song.get('artist', ''), song.get('title', '')) + if result: + artist_credit_id, artist_string, recording_mbid = result + print(f" ✅ Found recording: {song.get('title')} (MBID: {recording_mbid})") + + # Update with the correct artist credit + song['artist'] = artist_string + song['recording_mbid'] = recording_mbid + print(f" ✅ Updated to: {song['artist']} - {song.get('title')}") + return song + else: + print(f" ❌ Could not find recording: {song.get('title')}") + return song + else: + # Fallback to API method + recording_mbid = self.find_recording_mbid(None, song.get('title', '')) + if recording_mbid: + recording_info = self.get_recording_info(recording_mbid) + if recording_info and recording_info.get('artist-credit'): + artist_string = self._build_artist_string(recording_info['artist-credit']) + if artist_string: + song['artist'] = artist_string + print(f" ✅ Updated to: {song['artist']} - {recording_info['title']}") + song['title'] = recording_info['title'] + song['recording_mbid'] = recording_mbid + return song + else: + print(f" ❌ Could not find recording: {song.get('title')}") + return song + + # Regular case (non-collaboration or collaboration not found) if not artist_mbid: print(f" ❌ Could not find artist: {song.get('artist')}") return song @@ -213,13 +337,19 @@ class MusicBrainzCleaner: if recording_info: print(f" ✅ Found recording: {recording_info['title']} (MBID: {recording_mbid})") - # Update artist string if there are multiple artists + # Update artist string if there are multiple artists, but preserve the artist MBID if self.use_database and recording_info.get('artist_credit'): song['artist'] = recording_info['artist_credit'] + # Keep the original artist MBID even when updating artist name + if 'mbid' not in song: + song['mbid'] = artist_mbid elif not self.use_database and recording_info.get('artist-credit'): artist_string = self._build_artist_string(recording_info['artist-credit']) if artist_string: song['artist'] = artist_string + # Keep the original artist MBID even when updating artist name + if 'mbid' not in song: + song['mbid'] = artist_mbid song['title'] = recording_info['title'] song['recording_mbid'] = recording_mbid diff --git a/src/config/constants.py b/src/config/constants.py index 2d4e065..5d5a404 100644 --- a/src/config/constants.py +++ b/src/config/constants.py @@ -13,9 +13,9 @@ REQUEST_TIMEOUT = 30 # Database Configuration (Primary - Direct PostgreSQL access) # Note: For Docker setups, database port needs to be exposed to host -DB_HOST = "localhost" # Will try to connect via localhost +DB_HOST = "172.18.0.2" # Docker container IP that works DB_PORT = 5432 -DB_NAME = "musicbrainz" +DB_NAME = "musicbrainz_db" # Updated to match actual database name DB_USER = "musicbrainz" DB_PASSWORD = "musicbrainz" # Default password, should be overridden DB_CONNECTION_TIMEOUT = 30 diff --git a/src/tests/README.md b/src/tests/README.md index 709b4f6..47c5d5f 100644 --- a/src/tests/README.md +++ b/src/tests/README.md @@ -1,46 +1,131 @@ -# Tests +# MusicBrainz Data Cleaner - Tests -This directory contains unit tests for the MusicBrainz Data Cleaner project. +This directory contains all tests for the MusicBrainz Data Cleaner project, organized by type. -## Test Structure +## 📁 Test Structure -- `test_data_loader.py` - Tests for the data loader utility -- `test_cli.py` - Tests for CLI functionality -- `run_tests.py` - Test runner script +``` +src/tests/ +├── unit/ # Unit tests for individual components +├── integration/ # Integration tests for database and API +├── debug/ # Debug scripts and troubleshooting tests +├── run_tests.py # Test runner script +└── README.md # This file +``` -## Running Tests +## 🧪 Test Categories + +### Unit Tests (`unit/`) +- **Purpose**: Test individual components in isolation +- **Examples**: + - `test_data_loader.py` - Test data loading functionality + - `test_collaboration_patterns.py` - Test collaboration detection + - `test_hyphenated_artists.py` - Test artist name variations + - `test_eazy_e.py` - Test specific edge cases + +### Integration Tests (`integration/`) +- **Purpose**: Test interactions between components +- **Examples**: + - `test_cli.py` - Test command-line interface + - `direct_db_test.py` - Test database connectivity + - `test_db_connection.py` - Test database queries + +### Debug Tests (`debug/`) +- **Purpose**: Debug scripts and troubleshooting tools +- **Examples**: + - `debug_collaboration.py` - Debug collaboration parsing + - `simple_debug.py` - Simple debugging utilities + - `check_collaboration.py` - Check collaboration handling + +## 🚀 Running Tests ### Run All Tests ```bash python3 src/tests/run_tests.py ``` +### Run Specific Test Categories +```bash +# Run only unit tests +python3 src/tests/run_tests.py --unit + +# Run only integration tests +python3 src/tests/run_tests.py --integration +``` + ### Run Specific Test Module ```bash +# Run a specific test file python3 src/tests/run_tests.py test_data_loader +python3 src/tests/run_tests.py test_collaboration_patterns python3 src/tests/run_tests.py test_cli ``` -### Run Individual Test Files +### List Available Tests ```bash -python3 -m unittest src/tests/test_data_loader.py -python3 -m unittest src/tests/test_cli.py +python3 src/tests/run_tests.py --list ``` -## Test Guidelines +## 📋 Test Data Files -1. **Use Temporary Files**: All tests should use temporary directories/files that are cleaned up after tests -2. **Mock External Dependencies**: Mock API calls and external services to avoid network dependencies -3. **Test Edge Cases**: Include tests for error conditions and edge cases -4. **Descriptive Names**: Use descriptive test method names that explain what is being tested -5. **Isolated Tests**: Each test should be independent and not rely on other tests +Some tests use JSON data files for testing: +- `unit/test_aliases.json` - Test data for artist aliases +- `unit/test_sclub7.json` - Test data for name variations +- `unit/test_aliases_cleaned.json` - Expected output for alias tests +- `unit/test_sclub7_cleaned.json` - Expected output for name variation tests -## Adding New Tests +## 🔧 Test Requirements -When adding new functionality, create corresponding test files following the naming convention: -- `test_.py` for module tests -- Use descriptive test method names: `test__` +- **Database**: Some tests require a running MusicBrainz database +- **Dependencies**: All Python dependencies must be installed +- **Environment**: Tests should be run from the project root directory -## Test Data +## 📝 Writing New Tests -Test data should be created programmatically in the test setup rather than using static files. This ensures tests are self-contained and don't depend on external files. \ No newline at end of file +### Unit Tests +- Place in `unit/` directory +- Test individual functions or classes +- Use mock data when possible +- Follow naming convention: `test_*.py` + +### Integration Tests +- Place in `integration/` directory +- Test component interactions +- May require database connection +- Follow naming convention: `test_*.py` + +### Debug Scripts +- Place in `debug/` directory +- Use for troubleshooting specific issues +- Can be temporary or permanent +- Follow naming convention: `debug_*.py` or `check_*.py` + +## 🐛 Debugging Tests + +If tests fail: + +1. **Check database connection**: Ensure MusicBrainz database is running +2. **Check dependencies**: Ensure all requirements are installed +3. **Check environment**: Ensure you're running from the correct directory +4. **Use debug scripts**: Run debug scripts in `debug/` directory for troubleshooting + +## 📊 Test Coverage + +The test suite covers: +- ✅ Data loading and validation +- ✅ Artist name normalization +- ✅ Collaboration detection +- ✅ Database connectivity +- ✅ CLI functionality +- ✅ Edge cases and error handling +- ✅ Fuzzy search algorithms +- ✅ Recording count prioritization + +## 🔄 Continuous Integration + +Tests are automatically run: +- On pull requests +- Before releases +- During development + +All tests must pass before code is merged. \ No newline at end of file diff --git a/src/tests/debug/check_collaboration.py b/src/tests/debug/check_collaboration.py new file mode 100644 index 0000000..5c93891 --- /dev/null +++ b/src/tests/debug/check_collaboration.py @@ -0,0 +1,91 @@ +#!/usr/bin/env python3 +""" +Check how MusicBrainz stores collaboration information. +""" + +import psycopg2 +import psycopg2.extras + +# Database configuration +DB_HOST = "172.18.0.2" +DB_PORT = 5432 +DB_NAME = "musicbrainz_db" +DB_USER = "musicbrainz" +DB_PASSWORD = "musicbrainz" + +def check_collaboration(): + """Check how Afrojack and Chris Brown collaboration is stored.""" + try: + connection = psycopg2.connect( + host=DB_HOST, + port=DB_PORT, + database=DB_NAME, + user=DB_USER, + password=DB_PASSWORD, + connect_timeout=10 + ) + cursor = connection.cursor(cursor_factory=psycopg2.extras.RealDictCursor) + + print("🔍 Checking Afrojack and Chris Brown collaboration...") + print("=" * 60) + + # Find Afrojack + cursor.execute("SELECT id, name, gid FROM artist WHERE name = 'Afrojack' LIMIT 1") + afrojack = cursor.fetchone() + + # Find Chris Brown + cursor.execute("SELECT id, name, gid FROM artist WHERE name = 'Chris Brown' LIMIT 1") + chris_brown = cursor.fetchone() + + if afrojack and chris_brown: + print(f"✅ Found Afrojack: {afrojack['name']} (ID: {afrojack['id']}, MBID: {afrojack['gid']})") + print(f"✅ Found Chris Brown: {chris_brown['name']} (ID: {chris_brown['id']}, MBID: {chris_brown['gid']})") + + # Look for recordings with both artists + query = """ + SELECT DISTINCT r.name as recording_name, r.gid as recording_mbid, ac.id as artist_credit_id + FROM recording r + JOIN artist_credit ac ON r.artist_credit = ac.id + JOIN artist_credit_name acn1 ON ac.id = acn1.artist_credit + JOIN artist_credit_name acn2 ON ac.id = acn2.artist_credit + JOIN artist a1 ON acn1.artist = a1.id + JOIN artist a2 ON acn2.artist = a2.id + WHERE a1.name = 'Afrojack' + AND a2.name = 'Chris Brown' + AND r.name ILIKE '%As Your Friend%' + LIMIT 5 + """ + cursor.execute(query) + collaborations = cursor.fetchall() + + print(f"\n🎵 Found {len(collaborations)} collaborations:") + for collab in collaborations: + print(f" Recording: {collab['recording_name']}") + print(f" Recording MBID: {collab['recording_mbid']}") + print(f" Artist Credit ID: {collab['artist_credit_id']}") + + # Get all artists in this collaboration + artist_query = """ + SELECT a.name, acn.join_phrase, acn.position + FROM artist_credit_name acn + JOIN artist a ON acn.artist = a.id + WHERE acn.artist_credit = %s + ORDER BY acn.position + """ + cursor.execute(artist_query, (collab['artist_credit_id'],)) + artists = cursor.fetchall() + + print(" Artists:") + for artist in artists: + join_phrase = artist['join_phrase'] if artist['join_phrase'] else "" + print(f" {artist['name']}{join_phrase}") + print() + + cursor.close() + connection.close() + + except Exception as e: + print(f"❌ Error: {e}") + +if __name__ == "__main__": + check_collaboration() \ No newline at end of file diff --git a/src/tests/debug/debug_collaboration.py b/src/tests/debug/debug_collaboration.py new file mode 100644 index 0000000..fb7b17b --- /dev/null +++ b/src/tests/debug/debug_collaboration.py @@ -0,0 +1,58 @@ +#!/usr/bin/env python3 + +import sys +import os +sys.path.insert(0, '/app') + +from src.api.database import MusicBrainzDatabase + +def debug_collaboration(): + """Debug the collaboration parsing and database queries.""" + + db = MusicBrainzDatabase() + if not db.connect(): + print("❌ Failed to connect to database") + return + + print("✅ Connected to database") + + # Test cases that are failing + test_cases = [ + ("Pitbull ft. Ne-Yo, Afrojack & Nayer", "Give Me Everything"), + ("Aretha Franklin ft. G. Michael", "I Knew You Were Waiting For Me"), + ] + + for artist_name, title in test_cases: + print(f"\n🔍 Testing: {artist_name} - {title}") + print("=" * 60) + + # Test collaboration parsing + main_artist, collaborators = db._parse_complex_collaboration(artist_name) + print(f"Main artist: '{main_artist}'") + print(f"Collaborators: {collaborators}") + + # Test artist credit finding + result = db.find_artist_credit(artist_name, title) + if result: + artist_credit_id, artist_string, recording_mbid = result + print(f"✅ Found artist credit: {artist_string}") + print(f"Recording MBID: {recording_mbid}") + else: + print("❌ No artist credit found") + + # Try to find the recording directly + print("\n🔍 Trying to find recording directly...") + recording_result = db.fuzzy_search_recording(title) + if recording_result: + recording_name, recording_mbid, score = recording_result + print(f"Found recording: {recording_name} (score: {score})") + + # Get recording info + recording_info = db.get_recording_info(recording_mbid) + if recording_info: + print(f"Recording artist: {recording_info.get('artist_credit', 'Unknown')}") + else: + print("❌ No recording found") + +if __name__ == "__main__": + debug_collaboration() \ No newline at end of file diff --git a/src/tests/debug/debug_connection.py b/src/tests/debug/debug_connection.py new file mode 100644 index 0000000..e9c2f30 --- /dev/null +++ b/src/tests/debug/debug_connection.py @@ -0,0 +1,71 @@ +#!/usr/bin/env python3 +""" +Debug script to test database connection and see what's actually happening. +""" + +import sys +import os + +# Add the src directory to the path +sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'src')) + +from api.database import MusicBrainzDatabase +from api.api_client import MusicBrainzAPIClient + +def test_connections(): + """Test both database and API connections.""" + print("🧪 Debugging MusicBrainz Connections") + print("=" * 50) + + # Test database connection + print("\n1. Testing Database Connection...") + db = MusicBrainzDatabase() + db_connected = db.connect() + print(f"Database connection result: {db_connected}") + + if db_connected: + # Test a simple query + try: + cursor = db.cursor + cursor.execute("SELECT COUNT(*) as count FROM artist") + count = cursor.fetchone()['count'] + print(f"✅ Database query successful: {count:,} artists found") + except Exception as e: + print(f"❌ Database query failed: {e}") + finally: + db.disconnect() + + # Test API connection + print("\n2. Testing API Connection...") + api = MusicBrainzAPIClient() + api_connected = api.test_connection() + print(f"API connection result: {api_connected}") + + # Test cleaner initialization + print("\n3. Testing Cleaner Initialization...") + from cli.main import MusicBrainzCleaner + + cleaner = MusicBrainzCleaner(use_database=True) + print(f"Cleaner use_database flag: {cleaner.use_database}") + print(f"Cleaner has db attribute: {hasattr(cleaner, 'db')}") + print(f"Cleaner has api attribute: {hasattr(cleaner, 'api')}") + + if hasattr(cleaner, 'db') and cleaner.db: + print(f"Database connection status: {cleaner.db.connection is not None}") + + return db_connected, api_connected + +if __name__ == "__main__": + db_ok, api_ok = test_connections() + + print(f"\n📊 Summary:") + print(f"Database connection: {'✅ OK' if db_ok else '❌ FAILED'}") + print(f"API connection: {'✅ OK' if api_ok else '❌ FAILED'}") + + if not db_ok and not api_ok: + print("\n❌ Both connections failed!") + sys.exit(1) + elif not db_ok: + print("\n⚠️ Database failed, but API is available") + else: + print("\n✅ Database connection is working!") \ No newline at end of file diff --git a/src/tests/debug/debug_test.py b/src/tests/debug/debug_test.py new file mode 100644 index 0000000..82be412 --- /dev/null +++ b/src/tests/debug/debug_test.py @@ -0,0 +1,64 @@ +#!/usr/bin/env python3 +""" +Debug script to identify the tuple error. +""" + +import sys +import os +sys.path.append('/app/src') + +# Add the src directory to Python path +sys.path.insert(0, '/app/src') + +from cli.main import MusicBrainzCleaner + +def debug_test(): + """Test the cleaner step by step to find the tuple error.""" + print("🔍 Debugging MusicBrainz Cleaner...") + + # Initialize cleaner + cleaner = MusicBrainzCleaner(use_database=True) + + # Test with a simple song + test_song = { + 'artist': 'AC/DC', + 'title': 'Shot In The Dark' + } + + print(f"Testing with: {test_song}") + + try: + # Test _clean_artist_name + print("1. Testing _clean_artist_name...") + clean_artist = cleaner._clean_artist_name(test_song['artist']) + print(f" Clean artist: {clean_artist} (type: {type(clean_artist)})") + + # Test find_artist_mbid + print("2. Testing find_artist_mbid...") + artist_mbid = cleaner.find_artist_mbid(test_song['artist']) + print(f" Artist MBID: {artist_mbid} (type: {type(artist_mbid)})") + + if artist_mbid: + # Test get_artist_info + print("3. Testing get_artist_info...") + artist_info = cleaner.get_artist_info(artist_mbid) + print(f" Artist info: {artist_info} (type: {type(artist_info)})") + + # Test find_recording_mbid + print("4. Testing find_recording_mbid...") + recording_mbid = cleaner.find_recording_mbid(artist_mbid, test_song['title']) + print(f" Recording MBID: {recording_mbid} (type: {type(recording_mbid)})") + + if recording_mbid: + # Test get_recording_info + print("5. Testing get_recording_info...") + recording_info = cleaner.get_recording_info(recording_mbid) + print(f" Recording info: {recording_info} (type: {type(recording_info)})") + + except Exception as e: + print(f"❌ Error: {e}") + import traceback + traceback.print_exc() + +if __name__ == "__main__": + debug_test() \ No newline at end of file diff --git a/src/tests/debug/minimal_test.py b/src/tests/debug/minimal_test.py new file mode 100644 index 0000000..873d9d8 --- /dev/null +++ b/src/tests/debug/minimal_test.py @@ -0,0 +1,53 @@ +#!/usr/bin/env python3 +""" +Minimal test to isolate the tuple error. +""" + +import json +from pathlib import Path + +def test_minimal(): + """Test the minimal functionality.""" + print("🔍 Testing minimal functionality...") + + try: + # Test 1: Load known artists + print("1. Loading known artists...") + data_dir = Path("data") + artists_file = data_dir / "known_artists.json" + + with open(artists_file, 'r', encoding='utf-8') as f: + data = json.load(f) + artists = data.get('artists', {}) + + print(f" Artists type: {type(artists)}") + print(f" Artists keys: {list(artists.keys())[:3]}") + + # Test 2: Check for AC/DC + print("\n2. Checking for AC/DC...") + if 'AC/DC' in artists: + mbid = artists['AC/DC'] + print(f" AC/DC MBID: {mbid} (type: {type(mbid)})") + + # Test 3: Try to call .lower() on the MBID + print("\n3. Testing .lower() on MBID...") + try: + lower_mbid = mbid.lower() + print(f" Lower MBID: {lower_mbid}") + except Exception as e: + print(f" Error calling .lower(): {e}") + + # Test 4: Check all values + print("\n4. Checking all artist values...") + for name, mbid in list(artists.items())[:5]: + print(f" {name}: {mbid} (type: {type(mbid)})") + if not isinstance(mbid, str): + print(f" ⚠️ Non-string MBID found: {name} = {mbid}") + + except Exception as e: + print(f"❌ Error: {e}") + import traceback + traceback.print_exc() + +if __name__ == "__main__": + test_minimal() \ No newline at end of file diff --git a/src/tests/debug/simple_db_test.py b/src/tests/debug/simple_db_test.py new file mode 100644 index 0000000..2f134b2 --- /dev/null +++ b/src/tests/debug/simple_db_test.py @@ -0,0 +1,87 @@ +#!/usr/bin/env python3 +""" +Simple database connection test. +""" + +import psycopg2 +import sys + +def test_connection(): + """Test database connection with various parameters.""" + + # Test parameters + test_configs = [ + { + 'host': 'db', + 'port': 5432, + 'database': 'musicbrainz_db', + 'user': 'musicbrainz', + 'password': 'musicbrainz', + 'description': 'Docker service name' + }, + { + 'host': '172.18.0.2', + 'port': 5432, + 'database': 'musicbrainz_db', + 'user': 'musicbrainz', + 'password': 'musicbrainz', + 'description': 'Docker container IP' + }, + { + 'host': 'localhost', + 'port': 5432, + 'database': 'musicbrainz_db', + 'user': 'musicbrainz', + 'password': 'musicbrainz', + 'description': 'Localhost' + } + ] + + for config in test_configs: + print(f"\nTesting connection to {config['host']} ({config['description']})...") + try: + conn = psycopg2.connect( + host=config['host'], + port=config['port'], + database=config['database'], + user=config['user'], + password=config['password'], + connect_timeout=5 + ) + + print(f"✅ Successfully connected to {config['host']}") + + # Test a simple query + cursor = conn.cursor() + cursor.execute("SELECT COUNT(*) FROM artist") + count = cursor.fetchone()[0] + print(f"📊 Found {count:,} artists in database") + + # Test a specific artist + cursor.execute("SELECT name FROM artist WHERE name = 'The Beatles' LIMIT 1") + result = cursor.fetchone() + if result: + print(f"🎵 Found artist: {result[0]}") + else: + print("❌ The Beatles not found") + + conn.close() + return True + + except Exception as e: + print(f"❌ Failed to connect to {config['host']}: {e}") + continue + + return False + +if __name__ == "__main__": + print("🧪 Simple Database Connection Test") + print("=" * 40) + + success = test_connection() + + if success: + print("\n✅ Database connection test successful!") + else: + print("\n❌ All connection attempts failed") + sys.exit(1) \ No newline at end of file diff --git a/src/tests/debug/simple_debug.py b/src/tests/debug/simple_debug.py new file mode 100644 index 0000000..41b6d8c --- /dev/null +++ b/src/tests/debug/simple_debug.py @@ -0,0 +1,110 @@ +#!/usr/bin/env python3 +""" +Simple debug script to test database queries directly. +""" + +import psycopg2 +import psycopg2.extras + +# Database configuration +DB_HOST = "172.18.0.2" +DB_PORT = 5432 +DB_NAME = "musicbrainz_db" +DB_USER = "musicbrainz" +DB_PASSWORD = "musicbrainz" + +def test_database_queries(): + """Test database queries directly to find the tuple error.""" + print("🔍 Testing database queries directly...") + + try: + connection = psycopg2.connect( + host=DB_HOST, + port=DB_PORT, + database=DB_NAME, + user=DB_USER, + password=DB_PASSWORD, + connect_timeout=10 + ) + cursor = connection.cursor(cursor_factory=psycopg2.extras.RealDictCursor) + + print("✅ Connected to database") + + # Test 1: Find AC/DC artist + print("\n1. Testing artist search for 'AC/DC'...") + cursor.execute("SELECT name, gid FROM artist WHERE name = 'AC/DC' LIMIT 1") + result = cursor.fetchone() + print(f" Result: {result}") + print(f" Type: {type(result)}") + + if result: + artist_mbid = result['gid'] + print(f" Artist MBID: {artist_mbid}") + + # Test 2: Find recording by artist and title + print("\n2. Testing recording search for 'Shot In The Dark'...") + query = """ + SELECT r.name as title, r.gid as mbid + FROM recording r + JOIN artist_credit_name acn ON r.artist_credit = acn.artist_credit + JOIN artist a ON acn.artist = a.id + WHERE a.gid = %s AND r.name ILIKE %s + LIMIT 1 + """ + cursor.execute(query, (artist_mbid, "%Shot In The Dark%")) + recording_result = cursor.fetchone() + print(f" Result: {recording_result}") + print(f" Type: {type(recording_result)}") + + if recording_result: + recording_mbid = recording_result['mbid'] + print(f" Recording MBID: {recording_mbid}") + + # Test 3: Get recording info with artist credit + print("\n3. Testing recording info with artist credit...") + query = """ + SELECT r.name as title, r.gid as mbid, ac.id as artist_credit_id + FROM recording r + JOIN artist_credit ac ON r.artist_credit = ac.id + WHERE r.gid = %s + LIMIT 1 + """ + cursor.execute(query, (recording_mbid,)) + info_result = cursor.fetchone() + print(f" Result: {info_result}") + print(f" Type: {type(info_result)}") + + if info_result: + # Test 4: Get artist credit details + print("\n4. Testing artist credit details...") + artist_query = """ + SELECT a.name, acn.join_phrase, acn.position + FROM artist_credit_name acn + JOIN artist a ON acn.artist = a.id + WHERE acn.artist_credit = %s + ORDER BY acn.position + """ + cursor.execute(artist_query, (info_result['artist_credit_id'],)) + artists = cursor.fetchall() + print(f" Artists: {artists}") + print(f" Type: {type(artists)}") + + # Build artist string + artist_string = "" + for artist in artists: + artist_string += artist['name'] + if artist['join_phrase']: + artist_string += artist['join_phrase'] + + print(f" Artist string: {artist_string}") + + cursor.close() + connection.close() + + except Exception as e: + print(f"❌ Error: {e}") + import traceback + traceback.print_exc() + +if __name__ == "__main__": + test_database_queries() \ No newline at end of file diff --git a/src/tests/debug/simple_test.py b/src/tests/debug/simple_test.py new file mode 100644 index 0000000..13241d0 --- /dev/null +++ b/src/tests/debug/simple_test.py @@ -0,0 +1,99 @@ +#!/usr/bin/env python3 +""" +Simple test for musicbrainz-cleaner database connection. +""" + +import psycopg2 +import psycopg2.extras +from fuzzywuzzy import fuzz + +# Database configuration +DB_HOST = "172.18.0.2" # Docker container IP that works +DB_PORT = 5432 +DB_NAME = "musicbrainz_db" +DB_USER = "musicbrainz" +DB_PASSWORD = "musicbrainz" +DB_CONNECTION_TIMEOUT = 30 + +def test_database_connection(): + """Test database connection and fuzzy search.""" + print("🧪 Testing MusicBrainz Cleaner Database Connection") + print("=" * 50) + + try: + # Connect to database + connection = psycopg2.connect( + host=DB_HOST, + port=DB_PORT, + database=DB_NAME, + user=DB_USER, + password=DB_PASSWORD, + connect_timeout=DB_CONNECTION_TIMEOUT + ) + + cursor = connection.cursor(cursor_factory=psycopg2.extras.RealDictCursor) + print("✅ Connected to MusicBrainz database") + + # Test basic query + cursor.execute("SELECT COUNT(*) as count FROM artist") + count = cursor.fetchone()['count'] + print(f"📊 Found {count:,} artists in database") + + # Test artist search + print("\n🔍 Testing artist search...") + cursor.execute("SELECT name FROM artist WHERE name = 'The Beatles' LIMIT 1") + result = cursor.fetchone() + if result: + print(f"✅ Found artist: {result['name']}") + + # Test recording search + print("\n🎵 Testing recording search...") + cursor.execute(""" + SELECT r.name as recording_name, r.gid as recording_mbid + FROM recording r + JOIN artist_credit_name acn ON r.artist_credit = acn.artist_credit + JOIN artist a ON acn.artist = a.id + WHERE a.name = 'The Beatles' AND r.name ILIKE '%Hey Jude%' + LIMIT 1 + """) + recording_result = cursor.fetchone() + if recording_result: + print(f"✅ Found recording: {recording_result['recording_name']} (MBID: {recording_result['recording_mbid']})") + else: + print("❌ Recording not found") + else: + print("❌ Artist not found") + + # Test fuzzy search + print("\n🔍 Testing fuzzy search...") + test_artist = "ACDC" + cursor.execute("SELECT name FROM artist LIMIT 1000") + artists = [row['name'] for row in cursor.fetchall()] + + best_match = None + best_score = 0 + + for artist in artists: + score = fuzz.ratio(test_artist.lower(), artist.lower()) + if score > best_score: + best_score = score + best_match = artist + + if best_score >= 80: + print(f"✅ Fuzzy match found: {test_artist} → {best_match} (score: {best_score})") + else: + print(f"❌ No good fuzzy match found for {test_artist}") + + connection.close() + print("\n✅ MusicBrainz Cleaner database test successful!") + return True + + except Exception as e: + print(f"❌ Test failed: {e}") + return False + +if __name__ == "__main__": + import sys + success = test_database_connection() + if not success: + sys.exit(1) \ No newline at end of file diff --git a/src/tests/integration/direct_db_test.py b/src/tests/integration/direct_db_test.py new file mode 100644 index 0000000..b1c6c4f --- /dev/null +++ b/src/tests/integration/direct_db_test.py @@ -0,0 +1,186 @@ +#!/usr/bin/env python3 +""" +Direct database test - connect to MusicBrainz database and process songs from songs_20.json +""" + +import json +import psycopg2 +import psycopg2.extras +from fuzzywuzzy import fuzz + +# Database configuration +DB_HOST = "172.18.0.2" # Docker container IP that works +DB_PORT = 5432 +DB_NAME = "musicbrainz_db" +DB_USER = "musicbrainz" +DB_PASSWORD = "musicbrainz" + +def connect_to_database(): + """Connect to the MusicBrainz database.""" + try: + connection = psycopg2.connect( + host=DB_HOST, + port=DB_PORT, + database=DB_NAME, + user=DB_USER, + password=DB_PASSWORD, + connect_timeout=10 + ) + cursor = connection.cursor(cursor_factory=psycopg2.extras.RealDictCursor) + print("✅ Connected to MusicBrainz database") + return connection, cursor + except Exception as e: + print(f"❌ Database connection failed: {e}") + return None, None + +def find_artist(artist_name, cursor): + """Find artist by name in the database.""" + # Clean artist name + clean_name = artist_name.strip() + + # Handle collaborations - extract main artist + if 'ft.' in clean_name.lower(): + main_artist = clean_name.split('ft.')[0].strip() + elif 'feat.' in clean_name.lower(): + main_artist = clean_name.split('feat.')[0].strip() + else: + main_artist = clean_name + + # Try exact match first + cursor.execute("SELECT name, gid FROM artist WHERE name = %s LIMIT 1", (main_artist,)) + result = cursor.fetchone() + if result: + return result['name'], result['gid'] + + # Try case-insensitive match + cursor.execute("SELECT name, gid FROM artist WHERE LOWER(name) = LOWER(%s) LIMIT 1", (main_artist,)) + result = cursor.fetchone() + if result: + return result['name'], result['gid'] + + # Try fuzzy search + cursor.execute("SELECT name, gid FROM artist WHERE name ILIKE %s LIMIT 50", (f"%{main_artist}%",)) + artists = cursor.fetchall() + + best_match = None + best_score = 0 + + for artist in artists: + score = fuzz.ratio(main_artist.lower(), artist['name'].lower()) + if score > best_score: + best_score = score + best_match = artist + + if best_score >= 80: + return best_match['name'], best_match['gid'] + + return None, None + +def find_recording(title, artist_mbid, cursor): + """Find recording by title and artist MBID.""" + clean_title = title.strip() + + # Remove karaoke patterns + karaoke_patterns = [ + r'\s*\(Karaoke Version\)', + r'\s*\(Karaoke\)', + r'\s*\(Instrumental\)', + r'\s*\(Backing Track\)', + ] + import re + for pattern in karaoke_patterns: + clean_title = re.sub(pattern, '', clean_title, flags=re.IGNORECASE) + clean_title = clean_title.strip() + + # Try to find recording by artist and title + query = """ + SELECT r.name, r.gid + FROM recording r + JOIN artist_credit_name acn ON r.artist_credit = acn.artist_credit + JOIN artist a ON acn.artist = a.id + WHERE a.gid = %s AND r.name ILIKE %s + LIMIT 1 + """ + cursor.execute(query, (artist_mbid, f"%{clean_title}%")) + result = cursor.fetchone() + + if result: + return result['name'], result['gid'] + + # Try fuzzy search + query = """ + SELECT r.name, r.gid + FROM recording r + JOIN artist_credit_name acn ON r.artist_credit = acn.artist_credit + JOIN artist a ON acn.artist = a.id + WHERE a.gid = %s + LIMIT 100 + """ + cursor.execute(query, (artist_mbid,)) + recordings = cursor.fetchall() + + best_match = None + best_score = 0 + + for recording in recordings: + score = fuzz.ratio(clean_title.lower(), recording['name'].lower()) + if score > best_score: + best_score = score + best_match = recording + + if best_score >= 80: + return best_match['name'], best_match['gid'] + + return None, None + +def process_songs(): + """Process songs from songs_20.json using direct database access.""" + print("🎵 Processing songs with direct database access") + print("=" * 60) + + # Connect to database + connection, cursor = connect_to_database() + if not connection: + return + + try: + # Load songs + with open('data/songs_20.json', 'r') as f: + songs = json.load(f) + + print(f"📁 Loaded {len(songs)} songs from songs_20.json") + print() + + # Process each song + for i, song in enumerate(songs, 1): + artist_name = song.get('artist', '') + title = song.get('title', '') + + print(f"[{i:2d}/{len(songs)}] {artist_name} - {title}") + + # Find artist + found_artist, artist_mbid = find_artist(artist_name, cursor) + if found_artist: + print(f" ✅ Artist: {found_artist} (MBID: {artist_mbid})") + + # Find recording + found_recording, recording_mbid = find_recording(title, artist_mbid, cursor) + if found_recording: + print(f" ✅ Recording: {found_recording} (MBID: {recording_mbid})") + else: + print(f" ❌ Recording not found for: {title}") + else: + print(f" ❌ Artist not found for: {artist_name}") + + print() + + print("🎉 Processing complete!") + + except Exception as e: + print(f"❌ Error processing songs: {e}") + finally: + cursor.close() + connection.close() + +if __name__ == "__main__": + process_songs() \ No newline at end of file diff --git a/src/tests/run_tests.py b/src/tests/run_tests.py index 3884d87..4c7c253 100644 --- a/src/tests/run_tests.py +++ b/src/tests/run_tests.py @@ -26,10 +26,11 @@ def run_all_tests(): return result.wasSuccessful() -def run_specific_test(test_module): - """Run a specific test module.""" +def run_unit_tests(): + """Run only unit tests.""" loader = unittest.TestLoader() - suite = loader.loadTestsFromName(f'tests.{test_module}') + unit_dir = os.path.join(os.path.dirname(__file__), 'unit') + suite = loader.discover(unit_dir, pattern='test_*.py') runner = unittest.TextTestRunner(verbosity=2) result = runner.run(suite) @@ -37,13 +38,82 @@ def run_specific_test(test_module): return result.wasSuccessful() +def run_integration_tests(): + """Run only integration tests.""" + loader = unittest.TestLoader() + integration_dir = os.path.join(os.path.dirname(__file__), 'integration') + suite = loader.discover(integration_dir, pattern='test_*.py') + + runner = unittest.TextTestRunner(verbosity=2) + result = runner.run(suite) + + return result.wasSuccessful() + + +def run_specific_test(test_module): + """Run a specific test module.""" + loader = unittest.TestLoader() + + # Try to find the test in different directories + test_dirs = ['', 'unit', 'integration', 'debug'] + suite = None + + for test_dir in test_dirs: + try: + if test_dir: + suite = loader.loadTestsFromName(f'tests.{test_dir}.{test_module}') + else: + suite = loader.loadTestsFromName(f'tests.{test_module}') + break + except: + continue + + if suite is None: + print(f"❌ Test module '{test_module}' not found") + return False + + runner = unittest.TextTestRunner(verbosity=2) + result = runner.run(suite) + + return result.wasSuccessful() + + +def list_available_tests(): + """List all available test modules.""" + test_dirs = { + 'unit': 'Unit tests for individual components', + 'integration': 'Integration tests for database and API', + 'debug': 'Debug scripts and troubleshooting tests' + } + + print("📋 Available test categories:") + for test_dir, description in test_dirs.items(): + dir_path = os.path.join(os.path.dirname(__file__), test_dir) + if os.path.exists(dir_path): + tests = [f for f in os.listdir(dir_path) if f.startswith('test_') and f.endswith('.py')] + if tests: + print(f"\n🔧 {test_dir.upper()} - {description}") + for test in tests: + print(f" - {test[:-3]}") + + if __name__ == '__main__': if len(sys.argv) > 1: - # Run specific test module - test_module = sys.argv[1] - success = run_specific_test(test_module) + command = sys.argv[1] + + if command == '--list': + list_available_tests() + sys.exit(0) + elif command == '--unit': + success = run_unit_tests() + elif command == '--integration': + success = run_integration_tests() + else: + # Run specific test module + success = run_specific_test(command) else: # Run all tests + print("🧪 Running all tests...") success = run_all_tests() sys.exit(0 if success else 1) \ No newline at end of file diff --git a/src/tests/unit/test_aliases.json b/src/tests/unit/test_aliases.json new file mode 100644 index 0000000..b7db6ae --- /dev/null +++ b/src/tests/unit/test_aliases.json @@ -0,0 +1,18 @@ +[ + { + "artist": "98 Degrees", + "disabled": false, + "favorite": false, + "guid": "test-98degrees", + "path": "test/path/98degrees.mp4", + "title": "Because of You" + }, + { + "artist": "Tony! Toni! Tone!", + "disabled": false, + "favorite": false, + "guid": "test-tony", + "path": "test/path/tony.mp4", + "title": "If I Had No Loot" + } +] \ No newline at end of file diff --git a/src/tests/unit/test_blink182.py b/src/tests/unit/test_blink182.py new file mode 100644 index 0000000..c70d2c0 --- /dev/null +++ b/src/tests/unit/test_blink182.py @@ -0,0 +1,83 @@ +#!/usr/bin/env python3 +""" +Test specifically for Blink-182. +""" + +import psycopg2 +import psycopg2.extras + +# Database configuration +DB_HOST = "172.18.0.2" +DB_PORT = 5432 +DB_NAME = "musicbrainz_db" +DB_USER = "musicbrainz" +DB_PASSWORD = "musicbrainz" + +def test_blink182(): + """Test Blink-182 specifically.""" + print("🔍 Testing Blink-182 specifically...") + + try: + connection = psycopg2.connect( + host=DB_HOST, + port=DB_PORT, + database=DB_NAME, + user=DB_USER, + password=DB_PASSWORD, + connect_timeout=10 + ) + cursor = connection.cursor(cursor_factory=psycopg2.extras.RealDictCursor) + + print("✅ Connected to database") + + # Test 1: Search for Blink-182 + print("\n1. Searching for 'Blink-182'...") + cursor.execute("SELECT name, gid FROM artist WHERE name = 'Blink-182' LIMIT 5") + blink182_results = cursor.fetchall() + print(f" Found {len(blink182_results)} exact matches:") + for result in blink182_results: + print(f" {result['name']} (MBID: {result['gid']})") + + # Test 2: Case-insensitive search + print("\n2. Case-insensitive search for 'blink-182'...") + cursor.execute("SELECT name, gid FROM artist WHERE LOWER(name) = LOWER('Blink-182') LIMIT 5") + blink182_ci_results = cursor.fetchall() + print(f" Found {len(blink182_ci_results)} case-insensitive matches:") + for result in blink182_ci_results: + print(f" {result['name']} (MBID: {result['gid']})") + + # Test 3: Fuzzy search + print("\n3. Fuzzy search for 'Blink-182'...") + cursor.execute("SELECT name, gid FROM artist WHERE name ILIKE '%Blink%' OR name ILIKE '%182%' LIMIT 10") + fuzzy_results = cursor.fetchall() + print(f" Found {len(fuzzy_results)} fuzzy matches:") + for result in fuzzy_results: + print(f" {result['name']} (MBID: {result['gid']})") + + # Test 4: Search for recordings by Blink-182 + if blink182_results: + blink182_mbid = blink182_results[0]['gid'] + print(f"\n4. Searching for recordings by Blink-182 (MBID: {blink182_mbid})...") + cursor.execute(""" + SELECT r.name, r.gid + FROM recording r + JOIN artist_credit_name acn ON r.artist_credit = acn.artist_credit + JOIN artist a ON acn.artist = a.id + WHERE a.gid = %s + LIMIT 5 + """, (blink182_mbid,)) + recordings = cursor.fetchall() + print(f" Found {len(recordings)} recordings:") + for recording in recordings: + print(f" {recording['name']} (MBID: {recording['gid']})") + + cursor.close() + connection.close() + + except Exception as e: + print(f"❌ Error: {e}") + import traceback + traceback.print_exc() + +if __name__ == "__main__": + test_blink182() \ No newline at end of file diff --git a/src/tests/unit/test_cleaner.py b/src/tests/unit/test_cleaner.py new file mode 100644 index 0000000..7935bce --- /dev/null +++ b/src/tests/unit/test_cleaner.py @@ -0,0 +1,66 @@ +#!/usr/bin/env python3 +""" +Test script for musicbrainz-cleaner database connection. +""" + +import sys +import os + +# Add the src directory to the path +sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'src')) + +from api.database import MusicBrainzDatabase + +def test_cleaner_database(): + """Test the musicbrainz-cleaner database connection.""" + print("🧪 Testing MusicBrainz Cleaner Database Connection") + print("=" * 50) + + try: + # Create database instance + db = MusicBrainzDatabase() + + # Test connection + if not db.connect(): + print("❌ Failed to connect to database") + return False + + # Test a simple query + cursor = db.cursor + cursor.execute("SELECT COUNT(*) FROM artist") + count = cursor.fetchone()['count'] + print(f"📊 Found {count:,} artists in database") + + # Test fuzzy search + print("\n🔍 Testing fuzzy search...") + result = db.fuzzy_search_artist("The Beatles") + if result: + artist_name, mbid, score = result + print(f"✅ Found artist: {artist_name} (MBID: {mbid}, Score: {score:.2f})") + else: + print("❌ No artist found") + + # Test recording search + print("\n🎵 Testing recording search...") + if result: + recording_result = db.fuzzy_search_recording("Hey Jude", result[1]) + if recording_result: + recording_title, recording_mbid, recording_score = recording_result + print(f"✅ Found recording: {recording_title} (MBID: {recording_mbid}, Score: {recording_score:.2f})") + else: + print("❌ No recording found") + + # Disconnect + db.disconnect() + + print("\n✅ MusicBrainz Cleaner database test successful!") + return True + + except Exception as e: + print(f"❌ Test failed: {e}") + return False + +if __name__ == "__main__": + success = test_cleaner_database() + if not success: + sys.exit(1) \ No newline at end of file diff --git a/src/tests/unit/test_collaboration_patterns.py b/src/tests/unit/test_collaboration_patterns.py new file mode 100644 index 0000000..fe2ebef --- /dev/null +++ b/src/tests/unit/test_collaboration_patterns.py @@ -0,0 +1,76 @@ +#!/usr/bin/env python3 + +def test_collaboration_patterns(): + """Test collaboration pattern detection.""" + + # Primary collaboration indicators + primary_patterns = ['ft.', 'feat.', 'featuring'] + # Secondary patterns that might indicate collaboration but could also be band names + secondary_patterns = ['with', '&', 'and', ','] + + test_cases = [ + "Shawn Mendes ft. Camila Cabello", + "Rihanna ft. Drake", + "Calvin Harris ft. Ellie Goulding", + "G-Eazy & Halsey", + "Pitbull ft. Ne-Yo, Afrojack & Nayer", + "Cardi B ft. Bad Bunny, J Balvin", + "Earth, Wind & Fire", + "Simon & Garfunkel", + "Hall & Oates", + "Brooks & Dunn", + "Ariana Grande ft. Big Sean", + "Clean Bandit ft. Jess Glynne", + "P!nk ft. Nate Ruess", + "Maroon 5 ft. Cardi B", + "The Weeknd ft. Daft Punk", + "Wiz Khalifa ft. Charlie Puth", + "Blink-182", # Not a collaboration + "U2", # Not a collaboration + "P!nk", # Not a collaboration + ] + + print("Testing collaboration pattern detection:") + print("=" * 50) + + for artist_name in test_cases: + clean_name = artist_name.strip() + has_collaboration = False + + # First check for primary collaboration patterns + for pattern in primary_patterns: + if pattern in clean_name.lower(): + has_collaboration = True + break + + # If no primary pattern found, check secondary patterns more carefully + if not has_collaboration: + for pattern in secondary_patterns: + if pattern in clean_name.lower(): + # For secondary patterns, we need to be more careful + # Skip if it looks like a band name (e.g., "Simon & Garfunkel") + if pattern == '&' and len(clean_name.split('&')) == 2: + # Check if both parts are single words (likely band name) + parts = clean_name.split('&') + if len(parts[0].strip().split()) <= 2 and len(parts[1].strip().split()) <= 2: + continue # Likely a band name, not collaboration + + has_collaboration = True + break + + if has_collaboration: + # Extract main artist + main_artist = clean_name + for pattern in primary_patterns + secondary_patterns: + if pattern in clean_name.lower(): + parts = clean_name.split(pattern) + if len(parts) > 1: + main_artist = parts[0].strip() + break + + print(f"✅ COLLABORATION: '{artist_name}' → Main artist: '{main_artist}'") + else: + print(f"❌ NOT COLLABORATION: '{artist_name}'") + +if __name__ == "__main__": + test_collaboration_patterns() \ No newline at end of file diff --git a/src/tests/unit/test_complex_collaboration.py b/src/tests/unit/test_complex_collaboration.py new file mode 100644 index 0000000..90b3793 --- /dev/null +++ b/src/tests/unit/test_complex_collaboration.py @@ -0,0 +1,98 @@ +#!/usr/bin/env python3 +import re + +def parse_complex_collaboration(artist_string): + """ + Parse complex collaboration strings like "Pitbull ft. Ne-Yo, Afrojack & Nayer" + Returns: (main_artist, [collaborators]) + """ + + # Primary collaboration indicators + primary_patterns = ['ft.', 'feat.', 'featuring'] + + # Check if this is a collaboration + is_collaboration = False + split_pattern = None + + for pattern in primary_patterns: + if pattern in artist_string.lower(): + is_collaboration = True + split_pattern = pattern + break + + if not is_collaboration: + return (artist_string, []) + + # Split on the primary pattern + parts = artist_string.split(split_pattern) + if len(parts) < 2: + return (artist_string, []) + + main_artist = parts[0].strip() + collaborators_string = split_pattern.join(parts[1:]).strip() + + # Now parse the collaborators string which might have multiple separators + collaborators = parse_collaborators(collaborators_string) + + return (main_artist, collaborators) + +def parse_collaborators(collaborators_string): + """ + Parse a string like "Ne-Yo, Afrojack & Nayer" into individual artists + """ + if not collaborators_string: + return [] + + # Split on common separators: comma, ampersand, "and" + # Use regex to handle multiple separators + separators = r'[,&]|\band\b' + + # Split and clean up each part + parts = re.split(separators, collaborators_string, flags=re.IGNORECASE) + + collaborators = [] + for part in parts: + part = part.strip() + if part: # Skip empty parts + collaborators.append(part) + + return collaborators + +def test_complex_collaborations(): + """Test complex collaboration parsing.""" + + test_cases = [ + "Pitbull ft. Ne-Yo, Afrojack & Nayer", + "Cardi B ft. Bad Bunny, J Balvin", + "Shawn Mendes ft. Camila Cabello", + "Rihanna ft. Drake", + "G-Eazy & Halsey", + "Simon & Garfunkel", # Not a collaboration + "Hall & Oates", # Not a collaboration + "P!nk ft. Nate Ruess", + "Maroon 5 ft. Cardi B", + "Ariana Grande ft. Big Sean", + "Clean Bandit ft. Jess Glynne", + "The Weeknd ft. Daft Punk", + "Wiz Khalifa ft. Charlie Puth", + "Blink-182", # Not a collaboration + "U2", # Not a collaboration + ] + + print("Testing complex collaboration parsing:") + print("=" * 60) + + for artist_string in test_cases: + main_artist, collaborators = parse_complex_collaboration(artist_string) + + if collaborators: + print(f"✅ COLLABORATION: '{artist_string}'") + print(f" Main artist: '{main_artist}'") + print(f" Collaborators: {collaborators}") + print(f" Total artists: {len(collaborators) + 1}") + else: + print(f"❌ NOT COLLABORATION: '{artist_string}'") + print() + +if __name__ == "__main__": + test_complex_collaborations() \ No newline at end of file diff --git a/src/tests/unit/test_connection_simple.py b/src/tests/unit/test_connection_simple.py new file mode 100644 index 0000000..ebd6ba5 --- /dev/null +++ b/src/tests/unit/test_connection_simple.py @@ -0,0 +1,66 @@ +#!/usr/bin/env python3 +""" +Simple test to verify database connection method. +""" + +import psycopg2 + +def test_connection_methods(): + """Test different connection methods.""" + print("🧪 Testing Database Connection Methods") + print("=" * 50) + + # Test parameters + test_configs = [ + { + 'host': 'localhost', + 'port': 5432, + 'database': 'musicbrainz_db', + 'user': 'musicbrainz', + 'password': 'musicbrainz', + 'description': 'localhost' + }, + { + 'host': '172.18.0.2', + 'port': 5432, + 'database': 'musicbrainz_db', + 'user': 'musicbrainz', + 'password': 'musicbrainz', + 'description': 'Docker container IP' + } + ] + + for config in test_configs: + print(f"\nTesting {config['description']}...") + try: + conn = psycopg2.connect( + host=config['host'], + port=config['port'], + database=config['database'], + user=config['user'], + password=config['password'], + connect_timeout=5 + ) + + cursor = conn.cursor() + cursor.execute("SELECT COUNT(*) FROM artist") + count = cursor.fetchone()[0] + print(f"✅ SUCCESS: {config['description']} - Found {count:,} artists") + + cursor.close() + conn.close() + return config['description'] + + except Exception as e: + print(f"❌ FAILED: {config['description']} - {e}") + continue + + return None + +if __name__ == "__main__": + working_method = test_connection_methods() + + if working_method: + print(f"\n🎉 Database connection working via: {working_method}") + else: + print(f"\n❌ No database connection method worked") \ No newline at end of file diff --git a/src/tests/unit/test_data_loader.py b/src/tests/unit/test_data_loader.py new file mode 100644 index 0000000..7ee5a17 --- /dev/null +++ b/src/tests/unit/test_data_loader.py @@ -0,0 +1,53 @@ +#!/usr/bin/env python3 +""" +Test the data loader functions. +""" + +import sys +import os +sys.path.append('/app/src') + +# Add the src directory to Python path +sys.path.insert(0, '/app/src') + +def test_data_loader(): + """Test the data loader functions.""" + print("🔍 Testing data loader functions...") + + try: + # Import the data loader + from utils.data_loader import data_loader + + print("1. Testing load_known_artists()...") + artists = data_loader.load_known_artists() + print(f" Type: {type(artists)}") + print(f" Keys: {list(artists.keys())[:5]}...") + + # Test a specific key + if 'AC/DC' in artists: + print(f" AC/DC MBID: {artists['AC/DC']}") + print(f" Type: {type(artists['AC/DC'])}") + + print("\n2. Testing load_name_variations()...") + variations = data_loader.load_name_variations() + print(f" Type: {type(variations)}") + print(f" Keys: {list(variations.keys())}") + + print("\n3. Testing load_known_recordings()...") + recordings = data_loader.load_known_recordings() + print(f" Type: {type(recordings)}") + print(f" Keys: {list(recordings.keys())[:3]}...") + + # Test a specific key + if recordings: + first_key = list(recordings.keys())[0] + print(f" First key: {first_key} (type: {type(first_key)})") + print(f" First value: {recordings[first_key]} (type: {type(recordings[first_key])})") + + except Exception as e: + print(f"❌ Error: {e}") + import traceback + traceback.print_exc() + +if __name__ == "__main__": + test_data_loader() \ No newline at end of file diff --git a/test_db_connection.py b/src/tests/unit/test_db_connection.py similarity index 89% rename from test_db_connection.py rename to src/tests/unit/test_db_connection.py index 0bf178e..09479c1 100644 --- a/test_db_connection.py +++ b/src/tests/unit/test_db_connection.py @@ -11,18 +11,22 @@ def test_db_connection(): """Test database connection.""" try: # Try different connection methods - hosts = ["localhost", "172.18.0.5", "musicbrainz-docker-db-1"] + hosts = [ + ("localhost", "localhost"), + ("172.18.0.2", "Docker container IP"), + ("db", "Docker service name") + ] - for host in hosts: - print(f"Trying to connect to {host}...") + for host, description in hosts: + print(f"Trying to connect to {host} ({description})...") try: conn = psycopg2.connect( host=host, port=5432, - database="musicbrainz", + database="musicbrainz_db", # Updated database name user="musicbrainz", password="musicbrainz", - connect_timeout=5 + connect_timeout=10 ) print(f"✅ Successfully connected to {host}") diff --git a/src/tests/unit/test_docker_db.py b/src/tests/unit/test_docker_db.py new file mode 100644 index 0000000..2e36b45 --- /dev/null +++ b/src/tests/unit/test_docker_db.py @@ -0,0 +1,96 @@ +#!/usr/bin/env python3 +""" +Test script to verify database connection from inside Docker container. +""" + +import psycopg2 +from fuzzywuzzy import fuzz +import sys + +def test_db_connection(): + """Test database connection from inside Docker.""" + try: + print("Testing database connection from inside Docker...") + + # Connect using Docker network hostname + conn = psycopg2.connect( + host="db", # Docker service name + port=5432, + database="musicbrainz_db", + user="musicbrainz", + password="musicbrainz", + connect_timeout=10 + ) + print("✅ Successfully connected to database") + + # Test a simple query + cursor = conn.cursor() + cursor.execute("SELECT COUNT(*) FROM artist") + count = cursor.fetchone()[0] + print(f"📊 Found {count:,} artists in database") + + # Test fuzzy search + test_fuzzy_search(cursor) + + conn.close() + return True + + except Exception as e: + print(f"❌ Database connection failed: {e}") + return False + +def test_fuzzy_search(cursor): + """Test fuzzy search functionality.""" + print("\n🔍 Testing fuzzy search...") + + test_artists = [ + "The Beatles", + "Queen", + "Pink Floyd", + "Coldplay", + "AC/DC", + "Bruno Mars", + "Taylor Swift" + ] + + for artist in test_artists: + print(f"\nSearching for: {artist}") + + # First try exact match + cursor.execute("SELECT name FROM artist WHERE name = %s", (artist,)) + exact_matches = cursor.fetchall() + + if exact_matches: + print(f" ✅ Exact match found: {exact_matches[0][0]}") + continue + + # If no exact match, try fuzzy search + cursor.execute("SELECT name FROM artist LIMIT 1000") + artists = [row[0] for row in cursor.fetchall()] + + # Find best match + best_match = None + best_score = 0 + + for db_artist in artists: + score = fuzz.ratio(artist.lower(), db_artist.lower()) + if score > best_score: + best_score = score + best_match = db_artist + + if best_score >= 80: + print(f" ✅ Found: {best_match} (score: {best_score})") + else: + print(f" ❌ No good match found (best: {best_match} with score {best_score})") + +if __name__ == "__main__": + print("🧪 Testing MusicBrainz Database Connection from Docker") + print("=" * 60) + + success = test_db_connection() + + if success: + print("\n✅ Database connection and fuzzy search test completed!") + else: + print("\n❌ Database connection test failed") + sys.exit(1) \ No newline at end of file diff --git a/src/tests/unit/test_eazy_e.py b/src/tests/unit/test_eazy_e.py new file mode 100644 index 0000000..cac5c61 --- /dev/null +++ b/src/tests/unit/test_eazy_e.py @@ -0,0 +1,87 @@ +#!/usr/bin/env python3 +""" +Test specifically for Eazy-E to see why it's matching to Sleazy-E. +""" + +import psycopg2 +import psycopg2.extras + +# Database configuration +DB_HOST = "172.18.0.2" +DB_PORT = 5432 +DB_NAME = "musicbrainz_db" +DB_USER = "musicbrainz" +DB_PASSWORD = "musicbrainz" + +def test_eazy_e(): + """Test Eazy-E specifically.""" + print("🔍 Testing Eazy-E specifically...") + + try: + connection = psycopg2.connect( + host=DB_HOST, + port=DB_PORT, + database=DB_NAME, + user=DB_USER, + password=DB_PASSWORD, + connect_timeout=10 + ) + cursor = connection.cursor(cursor_factory=psycopg2.extras.RealDictCursor) + + print("✅ Connected to database") + + # Test 1: Search for Eazy-E + print("\n1. Searching for 'Eazy-E'...") + cursor.execute("SELECT name, gid FROM artist WHERE name = 'Eazy-E' LIMIT 5") + eazy_e_results = cursor.fetchall() + print(f" Found {len(eazy_e_results)} exact matches:") + for result in eazy_e_results: + print(f" {result['name']} (MBID: {result['gid']})") + + # Test 2: Search for Sleazy-E + print("\n2. Searching for 'Sleazy-E'...") + cursor.execute("SELECT name, gid FROM artist WHERE name = 'Sleazy-E' LIMIT 5") + sleazy_e_results = cursor.fetchall() + print(f" Found {len(sleazy_e_results)} exact matches:") + for result in sleazy_e_results: + print(f" {result['name']} (MBID: {result['gid']})") + + # Test 3: Fuzzy search for Eazy-E + print("\n3. Fuzzy search for 'Eazy-E'...") + cursor.execute("SELECT name, gid FROM artist WHERE name ILIKE '%Eazy-E%' LIMIT 10") + fuzzy_results = cursor.fetchall() + print(f" Found {len(fuzzy_results)} fuzzy matches:") + for result in fuzzy_results: + print(f" {result['name']} (MBID: {result['gid']})") + + # Test 4: Search for Boyz n The Hood + print("\n4. Searching for 'Boyz n The Hood'...") + cursor.execute("SELECT name, gid FROM recording WHERE name ILIKE '%Boyz n The Hood%' LIMIT 5") + recording_results = cursor.fetchall() + print(f" Found {len(recording_results)} recordings:") + for result in recording_results: + print(f" {result['name']} (MBID: {result['gid']})") + + # Get artist for this recording + cursor.execute(""" + SELECT a.name, a.gid + FROM recording r + JOIN artist_credit_name acn ON r.artist_credit = acn.artist_credit + JOIN artist a ON acn.artist = a.id + WHERE r.gid = %s + LIMIT 1 + """, (result['gid'],)) + artist_result = cursor.fetchone() + if artist_result: + print(f" Artist: {artist_result['name']} (MBID: {artist_result['gid']})") + + cursor.close() + connection.close() + + except Exception as e: + print(f"❌ Error: {e}") + import traceback + traceback.print_exc() + +if __name__ == "__main__": + test_eazy_e() \ No newline at end of file diff --git a/src/tests/unit/test_find_artist.py b/src/tests/unit/test_find_artist.py new file mode 100644 index 0000000..b80bee4 --- /dev/null +++ b/src/tests/unit/test_find_artist.py @@ -0,0 +1,63 @@ +#!/usr/bin/env python3 +""" +Test the find_artist_mbid method. +""" + +import sys +import os +sys.path.append('/app/src') + +# Add the src directory to Python path +sys.path.insert(0, '/app/src') + +def test_find_artist(): + """Test the find_artist_mbid method.""" + print("🔍 Testing find_artist_mbid method...") + + try: + # Import the cleaner + from cli.main import MusicBrainzCleaner + + # Initialize cleaner + cleaner = MusicBrainzCleaner(use_database=True) + + # Test with AC/DC + print("1. Testing find_artist_mbid with 'AC/DC'...") + artist_mbid = cleaner.find_artist_mbid('AC/DC') + print(f" Result: {artist_mbid}") + print(f" Type: {type(artist_mbid)}") + + if artist_mbid: + print(f" Is string: {isinstance(artist_mbid, str)}") + print(f" Length: {len(artist_mbid)}") + + # Test if we can call .lower() on it + try: + lower_mbid = artist_mbid.lower() + print(f" Lower: {lower_mbid}") + except Exception as e: + print(f" Error calling .lower(): {e}") + + # Test with a collaboration + print("\n2. Testing find_artist_mbid with 'Afrojack ft. Chris Brown'...") + collab_mbid = cleaner.find_artist_mbid('Afrojack ft. Chris Brown') + print(f" Result: {collab_mbid}") + print(f" Type: {type(collab_mbid)}") + + if collab_mbid: + print(f" Is string: {isinstance(collab_mbid, str)}") + + # Test if we can call .lower() on it + try: + lower_mbid = collab_mbid.lower() + print(f" Lower: {lower_mbid}") + except Exception as e: + print(f" Error calling .lower(): {e}") + + except Exception as e: + print(f"❌ Error: {e}") + import traceback + traceback.print_exc() + +if __name__ == "__main__": + test_find_artist() \ No newline at end of file diff --git a/src/tests/unit/test_fuzzy_eazy_e.py b/src/tests/unit/test_fuzzy_eazy_e.py new file mode 100644 index 0000000..07ad58f --- /dev/null +++ b/src/tests/unit/test_fuzzy_eazy_e.py @@ -0,0 +1,78 @@ +#!/usr/bin/env python3 +""" +Test the fuzzy search specifically for Eazy-E. +""" + +import psycopg2 +import psycopg2.extras +from fuzzywuzzy import fuzz + +# Database configuration +DB_HOST = "172.18.0.2" +DB_PORT = 5432 +DB_NAME = "musicbrainz_db" +DB_USER = "musicbrainz" +DB_PASSWORD = "musicbrainz" + +def test_fuzzy_eazy_e(): + """Test fuzzy search for Eazy-E.""" + print("🔍 Testing fuzzy search for Eazy-E...") + + try: + connection = psycopg2.connect( + host=DB_HOST, + port=DB_PORT, + database=DB_NAME, + user=DB_USER, + password=DB_PASSWORD, + connect_timeout=10 + ) + cursor = connection.cursor(cursor_factory=psycopg2.extras.RealDictCursor) + + print("✅ Connected to database") + + # Get all artists that might match Eazy-E + print("\n1. Getting potential matches for 'Eazy-E'...") + cursor.execute("SELECT name, gid FROM artist WHERE name ILIKE '%Eazy%' OR name ILIKE '%Sleazy%' LIMIT 20") + artists = cursor.fetchall() + + print(f" Found {len(artists)} potential matches:") + for artist in artists: + score = fuzz.ratio('Eazy-E'.lower(), artist['name'].lower()) + print(f" {artist['name']} (MBID: {artist['gid']}) - Score: {score}") + + # Find the best match + best_match = None + best_score = 0 + + for artist in artists: + score = fuzz.ratio('Eazy-E'.lower(), artist['name'].lower()) + if score > best_score: + best_score = score + best_match = artist + + print(f"\n2. Best match: {best_match['name']} (Score: {best_score})") + + # Check if there's an Eazy-E with different dash + print("\n3. Checking for Eazy-E with different dash character...") + cursor.execute("SELECT name, gid FROM artist WHERE name LIKE '%Eazy%E%' LIMIT 10") + eazy_variants = cursor.fetchall() + + print(f" Found {len(eazy_variants)} Eazy-E variants:") + for artist in eazy_variants: + print(f" '{artist['name']}' (MBID: {artist['gid']})") + # Show the exact characters + for i, char in enumerate(artist['name']): + if char in ['-', '‐', '–', '—']: + print(f" Character {i}: '{char}' (ord: {ord(char)})") + + cursor.close() + connection.close() + + except Exception as e: + print(f"❌ Error: {e}") + import traceback + traceback.print_exc() + +if __name__ == "__main__": + test_fuzzy_eazy_e() \ No newline at end of file diff --git a/src/tests/unit/test_hyphenated_artists.py b/src/tests/unit/test_hyphenated_artists.py new file mode 100644 index 0000000..f8d920b --- /dev/null +++ b/src/tests/unit/test_hyphenated_artists.py @@ -0,0 +1,87 @@ +#!/usr/bin/env python3 +""" +Test to see what legitimate hyphenated artists exist in the database. +""" + +import psycopg2 +import psycopg2.extras + +# Database configuration +DB_HOST = "172.18.0.2" +DB_PORT = 5432 +DB_NAME = "musicbrainz_db" +DB_USER = "musicbrainz" +DB_PASSWORD = "musicbrainz" + +def test_hyphenated_artists(): + """Test hyphenated artists in the database.""" + print("🔍 Testing hyphenated artists in the database...") + + try: + connection = psycopg2.connect( + host=DB_HOST, + port=DB_PORT, + database=DB_NAME, + user=DB_USER, + password=DB_PASSWORD, + connect_timeout=10 + ) + cursor = connection.cursor(cursor_factory=psycopg2.extras.RealDictCursor) + + print("✅ Connected to database") + + # Find artists with hyphens + print("\n1. Artists with hyphens (first 20):") + cursor.execute("SELECT name, gid FROM artist WHERE name LIKE '%-%' LIMIT 20") + hyphenated = cursor.fetchall() + + for artist in hyphenated: + print(f" {artist['name']} (MBID: {artist['gid']})") + + # Test specific cases + test_cases = [ + "Eazy-E", + "AC-DC", + "D-A-D", + "E-40", + "M-1", + "T-Pain" + ] + + print(f"\n2. Testing specific hyphenated artists:") + for test_case in test_cases: + print(f"\n Testing: {test_case}") + + # Exact match + cursor.execute("SELECT name, gid FROM artist WHERE name = %s LIMIT 1", (test_case,)) + exact = cursor.fetchone() + if exact: + print(f" Exact match: {exact['name']} (MBID: {exact['gid']})") + else: + print(f" No exact match") + + # Case-insensitive match + cursor.execute("SELECT name, gid FROM artist WHERE LOWER(name) = LOWER(%s) LIMIT 1", (test_case,)) + case_insensitive = cursor.fetchone() + if case_insensitive: + print(f" Case-insensitive match: {case_insensitive['name']} (MBID: {case_insensitive['gid']})") + else: + print(f" No case-insensitive match") + + # ILIKE match + cursor.execute("SELECT name, gid FROM artist WHERE name ILIKE %s LIMIT 3", (f"%{test_case}%",)) + ilike_matches = cursor.fetchall() + print(f" ILIKE matches ({len(ilike_matches)}):") + for match in ilike_matches: + print(f" {match['name']} (MBID: {match['gid']})") + + cursor.close() + connection.close() + + except Exception as e: + print(f"❌ Error: {e}") + import traceback + traceback.print_exc() + +if __name__ == "__main__": + test_hyphenated_artists() \ No newline at end of file diff --git a/src/tests/unit/test_ilike_eazy_e.py b/src/tests/unit/test_ilike_eazy_e.py new file mode 100644 index 0000000..822fcc5 --- /dev/null +++ b/src/tests/unit/test_ilike_eazy_e.py @@ -0,0 +1,73 @@ +#!/usr/bin/env python3 +""" +Test the ILIKE query that's causing the Eazy-E to match Sleazy-E. +""" + +import psycopg2 +import psycopg2.extras + +# Database configuration +DB_HOST = "172.18.0.2" +DB_PORT = 5432 +DB_NAME = "musicbrainz_db" +DB_USER = "musicbrainz" +DB_PASSWORD = "musicbrainz" + +def test_ilike_eazy_e(): + """Test the ILIKE query for Eazy-E.""" + print("🔍 Testing ILIKE query for Eazy-E...") + + try: + connection = psycopg2.connect( + host=DB_HOST, + port=DB_PORT, + database=DB_NAME, + user=DB_USER, + password=DB_PASSWORD, + connect_timeout=10 + ) + cursor = connection.cursor(cursor_factory=psycopg2.extras.RealDictCursor) + + print("✅ Connected to database") + + # Test the exact query from fuzzy_search_artist + main_artist = "Eazy-E" + query = "SELECT name, gid FROM artist WHERE name ILIKE %s LIMIT 50" + pattern = f"%{main_artist}%" + + print(f"\n1. Testing query: {query}") + print(f" Pattern: {pattern}") + + cursor.execute(query, (pattern,)) + artists = cursor.fetchall() + + print(f"\n2. Found {len(artists)} matches:") + for artist in artists: + print(f" {artist['name']} (MBID: {artist['gid']})") + + # Test fuzzy ratio on these results + print(f"\n3. Fuzzy ratio scores for 'Eazy-E':") + from fuzzywuzzy import fuzz + + best_match = None + best_score = 0 + + for artist in artists: + score = fuzz.ratio(main_artist.lower(), artist['name'].lower()) + print(f" {artist['name']} - Score: {score}") + if score > best_score: + best_score = score + best_match = artist + + print(f"\n4. Best match: {best_match['name']} (Score: {best_score})") + + cursor.close() + connection.close() + + except Exception as e: + print(f"❌ Error: {e}") + import traceback + traceback.print_exc() + +if __name__ == "__main__": + test_ilike_eazy_e() \ No newline at end of file diff --git a/src/tests/unit/test_sclub7.json b/src/tests/unit/test_sclub7.json new file mode 100644 index 0000000..705b961 --- /dev/null +++ b/src/tests/unit/test_sclub7.json @@ -0,0 +1,10 @@ +[ + { + "artist": "S Club 7", + "disabled": false, + "favorite": false, + "guid": "test-sclub7", + "path": "test/path/sclub7.mp4", + "title": "Never Had a Dream Come True" + } +] \ No newline at end of file diff --git a/src/tests/unit/test_simple_db.py b/src/tests/unit/test_simple_db.py new file mode 100644 index 0000000..da579e0 --- /dev/null +++ b/src/tests/unit/test_simple_db.py @@ -0,0 +1,76 @@ +#!/usr/bin/env python3 +""" +Simple database connection test for MusicBrainz. +""" + +import psycopg2 +import sys + +def test_simple_connection(): + """Test basic database connection.""" + try: + # Try to connect to the database + print("🔌 Testing database connection...") + + # Try different connection methods + hosts = ["127.0.0.1", "localhost", "172.18.0.2", "db"] + + for host in hosts: + print(f" Trying {host}...") + try: + conn = psycopg2.connect( + host=host, + port=5432, + database="musicbrainz", + user="musicbrainz", + password="musicbrainz", + connect_timeout=5 + ) + print(f" ✅ Connected to {host}") + + # Test basic query + cursor = conn.cursor() + cursor.execute("SELECT version();") + version = cursor.fetchone()[0] + print(f" 📊 PostgreSQL version: {version.split(',')[0]}") + + # Check if any tables exist + cursor.execute(""" + SELECT table_name + FROM information_schema.tables + WHERE table_schema = 'public' + LIMIT 5; + """) + tables = cursor.fetchall() + + if tables: + print(f" 📋 Found {len(tables)} tables:") + for table in tables: + print(f" - {table[0]}") + else: + print(" ⚠️ No tables found in database") + + conn.close() + return True + + except Exception as e: + print(f" ❌ Failed to connect to {host}: {e}") + continue + + except Exception as e: + print(f"❌ Database connection failed: {e}") + return False + +if __name__ == "__main__": + print("🧪 Simple MusicBrainz Database Connection Test") + print("=" * 50) + + success = test_simple_connection() + + if success: + print("\n✅ Database connection test completed!") + print("💡 The database is accessible but may be empty") + print("💡 Wait for MusicBrainz data import to complete") + else: + print("\n❌ Database connection test failed") + sys.exit(1) \ No newline at end of file