From c9b2e23e04f830da5c1aa445d3bf2b8ef41dc47b Mon Sep 17 00:00:00 2001 From: mbrucedogs Date: Sat, 26 Jul 2025 16:56:08 -0500 Subject: [PATCH] Signed-off-by: mbrucedogs --- PRD.md | 89 ++++++++++---- README.md | 141 ++++++++++++++++++++--- cli/__pycache__/matching.cpython-313.pyc | Bin 13423 -> 12970 bytes cli/__pycache__/report.cpython-313.pyc | Bin 37293 -> 37142 bytes cli/__pycache__/utils.cpython-313.pyc | Bin 7048 -> 9388 bytes cli/matching.py | 16 ++- cli/utils.py | 98 ++++++++++++++++ web/app.py | 32 +++-- 8 files changed, 324 insertions(+), 52 deletions(-) diff --git a/PRD.md b/PRD.md index 6459724..75f60c9 100644 --- a/PRD.md +++ b/PRD.md @@ -85,10 +85,14 @@ These principles are fundamental to the project's long-term success and must be - **Verbose per-song output:** Only for matches/duplicates (not every song) - **Verbosity configurable:** (via CLI flag or config) -### 4.3 Manual Review (Future Web UI) +### 4.3 Manual Review (Web UI) -- Table/grid view for ambiguous/complex cases -- Ability to preview media before making a selection +- **Interactive Web Interface**: Table/grid view for ambiguous/complex cases +- **Media Preview**: Ability to preview media before making a selection +- **Bulk Actions**: Select multiple items for batch operations +- **Real-time Filtering**: Search and filter capabilities +- **Responsive Design**: Works on desktop and mobile devices +- **Easy Startup**: Simple script (`start_web_ui.py`) with dependency checking --- @@ -108,27 +112,60 @@ These principles are fundamental to the project's long-term success and must be - **CLI Language:** Python - **Config:** JSON (channel priorities, settings) -- **Suggested Folder Structure:** -/data/ -allSongs.json -skipSongs.json -/config/ -config.json -/cli/ -main.py -matching.py -report.py -utils.py - -- (expandable for web UI later) +- **Current Folder Structure:** +``` +KaraokeMerge/ +├── data/ +│ ├── allSongs.json # Input: Your song library data +│ ├── skipSongs.json # Output: Generated skip list +│ └── reports/ # Detailed analysis reports +│ ├── analysis_data.json +│ ├── actionable_insights_report.txt +│ ├── channel_optimization_report.txt +│ ├── duplicate_pattern_report.txt +│ ├── enhanced_summary_report.txt +│ ├── skip_list_summary.txt +│ └── skip_songs_detailed.json +├── config/ +│ └── config.json # Configuration settings +├── cli/ +│ ├── main.py # Main CLI application +│ ├── matching.py # Song matching logic +│ ├── report.py # Report generation +│ └── utils.py # Utility functions +├── web/ # Web UI for manual review +│ ├── app.py # Flask web application +│ └── templates/ +│ └── index.html # Web interface template +├── start_web_ui.py # Web UI startup script +├── test_tool.py # Validation and testing script +├── requirements.txt # Python dependencies +├── .gitignore # Git ignore rules +├── PRD.md # Product Requirements Document +└── README.md # Project documentation +``` --- -## 7. Future Expansion: Web UI +## 7. Web UI Implementation -- Table/grid review, bulk actions -- Embedded player for media preview -- Config editor for channel priorities +### 7.1 Current Web UI Features +- **Interactive Table View**: Sortable, filterable grid of duplicate songs +- **Bulk Selection**: Select multiple items for batch operations +- **Search & Filter**: Real-time search across artists, titles, and paths +- **Responsive Design**: Mobile-friendly interface +- **Easy Startup**: Automated dependency checking and browser launch + +### 7.2 Web UI Architecture +- **Flask Backend**: Lightweight web server (`web/app.py`) +- **HTML Template**: Modern, responsive interface (`web/templates/index.html`) +- **Startup Script**: Dependency management and server startup (`start_web_ui.py`) + +### 7.3 Future Web UI Enhancements +- Embedded media player for audio/video preview +- Real-time configuration editing +- Advanced filtering and sorting options +- Export capabilities for manual selections --- @@ -200,11 +237,17 @@ The tool has been successfully implemented with the following components: - [x] Generate comprehensive skip list with metadata - [x] Optimize performance for large datasets (37,000+ songs) - [x] Add progress indicators and error handling +- [x] Generate detailed analysis reports (`--save-reports` functionality) +- [x] Create web UI for manual review of ambiguous cases +- [x] Add test tool for validation and debugging +- [x] Create startup script for web UI with dependency checking +- [x] Add comprehensive .gitignore file +- [x] Update documentation with required data file information #### 🎯 **Next Priority Items** -- [x] Generate detailed analysis reports (`--save-reports` functionality) - [ ] Analyze MP4 files without channel priorities to suggest new folder names -- [ ] Create web UI for manual review of ambiguous cases - [ ] Add support for additional file formats if needed - [ ] Implement batch processing capabilities -- [ ] Create integration scripts for karaoke software \ No newline at end of file +- [ ] Create integration scripts for karaoke software +- [ ] Add unit tests for core functionality +- [ ] Implement audio fingerprinting for better duplicate detection \ No newline at end of file diff --git a/README.md b/README.md index 3d706c8..f7ffbcf 100644 --- a/README.md +++ b/README.md @@ -20,7 +20,15 @@ A powerful command-line tool for analyzing, deduplicating, and cleaning up large KaraokeMerge/ ├── data/ │ ├── allSongs.json # Input: Your song library data -│ └── skipSongs.json # Output: Generated skip list +│ ├── skipSongs.json # Output: Generated skip list +│ └── reports/ # Detailed analysis reports +│ ├── analysis_data.json +│ ├── actionable_insights_report.txt +│ ├── channel_optimization_report.txt +│ ├── duplicate_pattern_report.txt +│ ├── enhanced_summary_report.txt +│ ├── skip_list_summary.txt +│ └── skip_songs_detailed.json ├── config/ │ └── config.json # Configuration settings ├── cli/ @@ -28,6 +36,14 @@ KaraokeMerge/ │ ├── matching.py # Song matching logic │ ├── report.py # Report generation │ └── utils.py # Utility functions +├── web/ # Web UI for manual review +│ ├── app.py # Flask web application +│ └── templates/ +│ └── index.html # Web interface template +├── start_web_ui.py # Web UI startup script +├── test_tool.py # Validation and testing script +├── requirements.txt # Python dependencies +├── .gitignore # Git ignore rules ├── PRD.md # Product Requirements Document └── README.md # This file ``` @@ -39,6 +55,42 @@ KaraokeMerge/ - Python 3.7 or higher - Your karaoke song data in JSON format (see Data Format section) +### Required Data File + +**Important**: You need to provide your own `data/allSongs.json` file. This file is excluded from version control due to its large size and personal nature. + +**Sample `allSongs.json` format:** +```json +[ + { + "artist": "ACDC", + "title": "Shot In The Dark", + "path": "z://MP4\\ACDC - Shot In The Dark (Karaoke Version).mp4", + "guid": "8946008c-7acc-d187-60e6-5286e55ad502", + "disabled": false, + "favorite": false + }, + { + "artist": "Queen", + "title": "Bohemian Rhapsody", + "path": "z://MP4\\Sing King Karaoke\\Queen - Bohemian Rhapsody (Karaoke Version).mp4", + "guid": "a1b2c3d4-e5f6-7890-abcd-ef1234567890", + "disabled": false, + "favorite": true + } +] +``` + +**Required fields:** +- `artist`: Song artist name +- `title`: Song title +- `path`: Full file path to the song file +- `guid`: Unique identifier for the song + +**Optional fields:** +- `disabled`: Boolean indicating if song is disabled (default: false) +- `favorite`: Boolean indicating if song is favorited (default: false) + ### Installation 1. Clone or download this repository @@ -59,7 +111,12 @@ python cli/main.py --dry-run # Save detailed reports python cli/main.py --save-reports -``` + +# Test the tool functionality +python test_tool.py + +# Start the web UI for manual review +python start_web_ui.py ### Command Line Options @@ -152,6 +209,37 @@ Edit `config/config.json` to customize the tool's behavior: } ``` +## 🌐 Web UI for Manual Review + +The project includes a web interface for interactive review of duplicate songs: + +### Starting the Web UI + +```bash +python start_web_ui.py +``` + +This script will: +- Check for required dependencies (Flask) +- Install missing dependencies automatically +- Validate required data files exist +- Start the web server +- Open your browser automatically + +### Web UI Features + +- **Interactive Table**: Sortable, filterable grid of duplicate songs +- **Bulk Selection**: Select multiple items for batch operations +- **Real-time Search**: Filter by artist, title, or file path +- **Responsive Design**: Works on desktop and mobile devices +- **Detailed Information**: View full metadata for each duplicate + +### Web UI Requirements + +- Flask web framework (automatically installed if missing) +- Generated skip list data (`data/skipSongs.json`) +- Configuration file (`config/config.json`) + ## 📈 Understanding the Output ### Summary Report @@ -232,14 +320,32 @@ The codebase is designed for easy expansion: - **Modular Design**: Separate modules for matching, reporting, and utilities - **Configuration-Driven**: Easy to modify behavior without code changes -- **Web UI Ready**: Structure supports future web interface development +- **Web UI Implementation**: Full web interface for manual review and bulk operations +- **Testing Framework**: Built-in test tool for validation and debugging +- **Dependency Management**: Automated setup and dependency checking + +### Testing and Validation + +Use the built-in test tool to validate your setup: + +```bash +python test_tool.py +``` + +This will: +- Test all module imports +- Validate configuration loading +- Test with a sample of your song data +- Verify report generation +- Provide feedback on any issues ### Adding New Features 1. **New File Formats**: Add extensions to `config.json` 2. **New Matching Rules**: Extend `SongMatcher` class in `matching.py` 3. **New Reports**: Add methods to `ReportGenerator` class -4. **Web UI**: Build on existing CLI structure +4. **Web UI Enhancements**: Extend `web/app.py` and `web/templates/index.html` +5. **Testing**: Add test cases to `test_tool.py` ## 🎯 Current Status @@ -251,6 +357,10 @@ The codebase is designed for easy expansion: - **Performance Optimization**: Handles large libraries (37,000+ songs) efficiently - **Enhanced Analysis & Reporting**: Comprehensive statistical analysis with actionable insights - **Pattern Analysis**: Skip list pattern analysis and channel optimization suggestions +- **Web UI**: Interactive web interface for manual review and bulk operations +- **Testing & Validation**: Test tool for functionality validation and debugging +- **Dependency Management**: Automated dependency checking and installation +- **Project Documentation**: Comprehensive .gitignore and updated documentation ### 🚀 **Ready for Use** The tool is production-ready and has successfully processed a large karaoke library: @@ -268,12 +378,15 @@ The tool is production-ready and has successfully processed a large karaoke libr - ✅ Add statistical insights and trends - ✅ Pattern analysis and channel optimization suggestions -### Phase 3: Web Interface -- Interactive table/grid for duplicate review -- Embedded media player for preview -- Bulk actions and manual overrides -- Real-time configuration editing -- Manual review interface for ambiguous cases +### Phase 3: Web Interface ✅ +- ✅ Interactive table/grid for duplicate review +- ✅ Bulk actions and manual overrides +- ✅ Real-time filtering and search +- ✅ Responsive design for mobile/desktop +- ✅ Easy startup with dependency checking +- [ ] Embedded media player for preview +- [ ] Real-time configuration editing +- [ ] Advanced export capabilities ### Phase 4: Advanced Features - Audio fingerprinting for better duplicate detection @@ -311,9 +424,11 @@ This project is open source. Feel free to use, modify, and distribute according ### Getting Help -1. Check the configuration with `python cli/main.py --show-config` -2. Run with `--verbose` for detailed output -3. Use `--dry-run` to test without generating files +1. **Test your setup**: Run `python test_tool.py` to validate everything is working +2. **Check configuration**: Use `python cli/main.py --show-config` to verify settings +3. **Verbose output**: Run with `--verbose` for detailed information +4. **Dry run**: Use `--dry-run` to test without generating files +5. **Web UI**: Start `python start_web_ui.py` for interactive review ## 📊 Performance & Results diff --git a/cli/__pycache__/matching.cpython-313.pyc b/cli/__pycache__/matching.cpython-313.pyc index e7fa632e669ec5734f9242484f615bc58eebb57b..288c5419db29c799c2aecbc40ffcc24143b4744b 100644 GIT binary patch delta 2042 zcmZ8idu&rx7{8~l+daCC?Y7F=ELz3w@` z?>k@r-FiM&aNlCd6>umk4{qJnJX}!9P7PNl49l(DIy_{x+xiU^!e)yoM2&*r5yB$R zcMB`4hcAE5MCfKs)~eXf43G9^x_i?pC7n$4MKiHJ*w0oO3*w1X-`Fj7kuiDfxPC(C zknlUIzCV`XHeB&i0-3-8yG(v2!70-Y<{Yz$kHkhzt|tdWGNEL6A=woW&~Y_i)f62V zh^6{eLtmmd!^>ew;cTyqv_1sY)X&p{1Bz-;(y4w`jPeY|QFs*`EBu(bAyCxG0%J;% zsAt}>JtZDy(mv^g&m_52ixQ$xOAr(~hsWdLZum)Bz}|y=>tb|TVZF`jmyuD{lU6Sh zI1mCN*MfAApp#%Z!Ar2o-fVKyvK~IL``8FvvL9w%2$lM!J18Czx)@H{*jThy2gBA9 zxKUbaLH@HNBA|4;?2!F}OQ6lPVrGWbHfu1VYkm^QFH{I!=4maa5{dT+UC#y#=-KWz zI4R5UwOqY4VC4DelFigyT&YG(XKbgMyFQv6j4k6l%>(-E9aocD?A;norDDma$ZB=G zAb){6r3YcZ@O$xdm*ku6U6buy*NZp91xMIA3l(FIP=PRB;11R6$LBIcIOVnJ-qAs@ zEXwx4P}w~=R-RKR(s^s}MKP5xFA`Z>o z70wF59cGX27YXhSOd+1f7f9{%bHB4o?F-?g*KXd2ryarU!dO-OETs9%dYCWfnBZC9uZd zCLY!NMEJ-*hneAff1n~KVhQJJaV+6HZO#kpwP^W4W2zuxfY2N(_$sAYzh>iYY>F70 z^nM{=fsV>=IyU2{_;!Mg1e{=iKtWJ*`UVG*iC*juyo!W$Fsw0hQcXM-#Yg%?0jsN| zwTsC{ONZ72^odjk3osMgrf|HO@KkNSM%!A1Kw-8T>GAUh1U0uOrewP5`71mD7pr_l zlsi>VEo&WFZG}Ip7Gf``s;)ELK!MT&o2m~sJunGnA$C=|8XtH5kh!@kKDjD>-O)cJ z{#N1`YPluJ6OOt`DR_3rb*bf2!=$wEXQ>5jH5Uw2PgK2Qw`)rDMMQ(j_F`RSZ~T6( zOQiemHU!}xS7$lz!VQ0g;8lc~0wuu~I8xinobY39xrw6k9{9U9Ba?gL52PfZ<7Ff! zGH~4uyXu^UT3gvd;=>3YI8zsp$$8~8(K+!%GS;2hH4x+L;7*~6-HB8@&G(S~UV@o~tGQZF&g#37tG9zSz7{f} z#PBx=C^2gO>oKk*(y6TF5x5p~Vw(O6b~4jg^W0Hp#TNGDF!ZM~{V(#keqEb~_Efgo ztyE(?nT}?(zN{L0(&;3xC*2MLY7m@WGS`aeEn1Dhv4(Oz+XP-&DC^F1ar-=DcMqJFsd>kE_QDb=XhLAtsD7b0}Xwp2raX2Nc5A{{R30 delta 2265 zcmZuyeQaCR6~EWdAF*S{X>8Ya{1Mx+O>8Gk(zp%KHchuSr9&;WCT-L%xNiKCcyVoW z-wWYmTZ%L$siAYD2xFG!N0@{Q>v#;^bfQTP3j^#X;AI!oaZL( zCa&~;_ucb#?>oP9&wEw=+jPqfhr`Bje75`P>FD7379YQO{;pDYg!x#E<*e;2XWM2V z1IC1B${1zec34=!#8`yoEW+Nw4kgaN{}MOD1kb{Ie5bW-vQ#PNCyGaHvXMiW*4d+E3hQqA~c-PVktrm$#mtJ;7_@EvFy6@r7Le{ez1F4=r@V>odbUWHUTKW-E zmPU~MB)E^@0KvE5DepFOm|lC~9d9T9DO~eD!=q60#d#krdOE>u@+9~+jwU#|8WStx zddhGD^W7L@tw+r8qAz2kQ(b0u|JG~=>C(2hGF3;y}nS}$ZgNN#ahgRAifFtd>Ep6y?#n--PfL$5r_=ZRNz|9dS zZj~u28Ee9a*t_wN1ix)BIG$r{)Q~e$9*>wPMR2Ce2*(1Sz}rDTw}LO2;L|V^{N=6n zr?}n@H-p1)Dd6w1lHYntBWx_2HN+{NE?=jr(4p^P{YfZ+0h6HtZiC~Ycf7~Y_Z5T2 z6}>PNI%VVxmQ+u87ykfe!jaJ^S#YrnC(TqVl!T(SC~BcnER?GiIe)q~@IPz3TPdTU z#0w|67?r9}HM9Jfiu|nCUksMR>9&5>7#*%@DMim%-TnH0DJGxE0cbH-*dPqnF0i;LZN0zQfk zw~ShI=6lB&E283H3Z&RKE%#%>O*so3jD`3(%*4hmpi7$IlUN3>N4>DA(-p9dI&#fK zAkKDuZq7+G=Nz!Rvv<=U8EeKnF_s-QWu-VvwZMGmujd~|1ECTeCpb(nOP~-uilAAG zv$b+*qM*tmLBb?L$|T5BGYh$(R4ca?ntevDsrg4`p_HnXluZngi%z26>!eae#e1a6 zrxk&z%CbA|ScOb86y4cqE-@1HA!xQ!vZCg3rzu60;m`5TR-#;!uo%QqvYLTFVkfr3 zu|$vg8VZz2IG1?(?rW~i7m`2Q_51MPW#i?eZ`t23u8xe?N5)se4?n;EGjI6$xvRd= za=5?l+qyWo;@ffgk-BgAcfK94H~AX2z%P>BQY$4!pcS4SfL? zyVIT9$$yw8g@T|e{~0uz4o31ymhJAv&cRAzf=E<(oc2$FPiFoXc+qs z6I9rGZE2>-au2VZ|~Bhzf4h0^x&YALoTyOkl-2h4#F&;d)UKqlb0mdH>?p+yR%G`AFxhk`=4 ziTL^ww`7|nxGb{FfV*fQW{#O{vP5PJ$XK_;J+k-*GwZ~~#68?Ob^mPVkKgy5-}gA* zJ->4=U0}`)Gwd?UDiB#4i~H+km)Wsg{GNC{_X7!>%}d4#xSXepRPc%z#2X-xmjDm) z^uV?$;gOETDPTzCH2kxA#HaFWawNQ(*GQ2|X;DID{tlc5)@>#@pRZz-YE+brke5No zJp+ap5@YlhRW#y_7M5p8>$PAgsKo2wwlM)t7pU-h@!f)I1|+8J>~b>Nk3GX&WwV#7 zx3=23lHFNcox^GOaOn zM?<$cA8wkTkw%lAuw)kSXX1e!vlxmBbJ#^y{Hc;RO8k1)ys8;)rf}kT;e15oPK=O8 z%Rqoj7B7{`Wq1^>@(Z|C9Nc4PuuEL7SdtL#?kfxUW4Ky1hzCUTzF{dI74KEA!}x-z zt{EUj;?O}J<2$fiUqucrbIdX+H^>meAORy5Hd7NNBv9&G3dIz@1GDoa{z;s3b}@!A z%EVFllEN&7ZwMGAPs{FsyZJ>N5vO%ic5lA z9=RcdacM15*xj{KLo_;y!s_t~8ObO6R#@Np4r5yZ=R0-qr#?!PPI$U(M9C5dLKZE* z5&WHrQSAsR5r<0pAg}8XejX;e46-bXu3QIkNqP;DI@_Y>Hxa2fTax%K#BK#uccmn! zJP86uHFS1sajtl_TZQp07&^Q<@)kKnc!FU1aD)5_@yR7^f-#d0Z0q#!i7yFWKN64E zh;JXs#dro5dImO(ldKUOl-pf4Pn)BqeuNRO5Gsh>9x^QAI6+H{aFud}U@g^!+uZ0? z<6W@Odp-TJG^5sbuvCx{#f3;J`~S;?D8jxB*N#p`(9VJ!I*z3(Xv1)%0!EIt#Xd*2 zp`nvk$UDvET0I7YfA)igA%Ljkib&1hlGPN zXLFlf5RTB)uPF2qFs^#4s-6;mfvw-9`<%GJXqUI?Rh!rCbktf~?4BTQ5Waw^{t6s` zMBjErH0kPDMG~vQ>PrN-PYUh6ng}`hL{FOdz0V<$7R}3xq2&0WB#>Z)yT_~6UbKxi zjWkW{{5r!lmtk7eB){dI)1+RE3uxBf(WEXb(aKG~Nsu=4VJvtDhGQEsTJ;k4M36(K zb7S^whqM#7aXdUau^O|WA56uEpxXpk@MFG4z5hPu@Q)z;Oau2R{6T@EH^PcL zPG;hQ9GPK4Gf_L0HBmpA971I4-@DJhzuI48_wTm{_SS{D#q9-C>Z#hP6_ceQ3CUS3 zD}P9Bs1?RfJ+U+<^iH8=*YnT0C88Qb^rhX delta 1436 zcmZ8geN0nV6u+mhg%(;$r4(BPA1Gi!#I1l@P(*OJXz}B%OopsfC=`bjUinY~1rfgh z#T+LlBhj&C(`6d+G45WN$>#gxch5b)bIv{Q zp5Hw+D7)~UO!HWy2_f{`zoyIGe_3oHeV_x6*(g=cutJYq`<fdA=w@#XlPR@ct58Y7UL~*Bx>JO$rloHs#LYQOcwM+zNEaO3P;wYC*{%e?zIo{ z#hymq?QC-K+dU0#-nPlYJ8LWWy!-;bvbx6Q#k4$wyv|uI;`+RPu;Qvbn!XeJEarqImc~BY(A6Ywz^DOSuhA1f6VDiv@U9l{-GmNH*+$-pBG6y4{j@9-OU;0tt=^g@sHpkz=Em!-b46oB`o z&z;>gNm^GW0Nlj^cS+FKtmFW`TNELMS6`DY_=+|p_EIo|n8_BE7|C2eF?f~1BtCfS z82l!+*0jpvt}u^|!M6-OYiL7y-YY3wgO zQF=D>Ze;R!Wb%vjV=bANF~t!>+O!@gj}C@=fW)i^ZxHQg)T*_sv9J3U=&`IP9>TD* zCjmO}e9w2wFS6EWw0xae|JU-&I}m?o#6t#88N{mhRcWNUM&o8G-hJsmT9&Ic45r%Cx#}VRT_r# zMNE4>^nY4`Z?P;4P%m~zBQ6CS<*xXkMOL4N#tZh?){t;m)8fkC; E0GuqLbpQYW diff --git a/cli/__pycache__/utils.cpython-313.pyc b/cli/__pycache__/utils.cpython-313.pyc index c6a10869b8e7c98969120d1866577e40dbc6ea06..b7077c4b7c68b1119e538758e4458fe129466e38 100644 GIT binary patch delta 3071 zcmcgue@q+K9e-#0?(91o8;r5}ZHj@oYjG1mv}9>$5)!(GByhQe1uxFUHpU&Cy>~`S zSsSya*&s*@=(U|wsVey|Q8Ou0{)whmP4>ssO_~HlX{^&Gk!k-#+9XiOq-9#O?>mFT zD3c~l+LP{c?|r}D_r33X@B96{o9evrddVY)LqO2>9l1F2QPW(Bk9lvdWss?vn|z0< zo%_0$sUtN^lY!NerwYAkd6U5&sUj>&T-X{GS(w{yk-SucD5E5Kg` zlR1f@-a6JLDnDfYk2Bu_eUYqrr81TX(ru*hv_Bf z1+U10ms-Qidt_c{JZ#L|WM9AjHAXhcW{GVz2T+ikcvm=tQ{!pBmQoX%KdEL?{=Q>v z{n+k<98#Fmzz89xtt{33wPDd}`v zjRi&AN@Z;n+9`C9zljG!B6R)F8ur3iFBwj%u?V=07+=S)f{<>jwnozS3NmBAZ(Zdr zc5Ve(ZjCwq!H9c7iMuJhMl@%T?S<|Ma@Bd4&6(ij$!Csw@)OtE0T~U@4{m9ad1G@_ zcF3`zl-Ug_Z0B)=#=%oM0f+a6qb4g^=+VYN)K?9=&kZ@hIZa|;gw*dUTek~QJCK3l zcGlERal?6^DA*XMZIw9Lx*O6^vA}GrX7Zx|BXIh_F`;N8y2U4lbL--DQ`1RALPao29diYQ11+91owX;HsXg^G5MsN=u za&axHq~kiL#T6VK(OE4$n#}07xH>)-$4Vxyn@8g+$R+ zOj5Vdo^Dg)=Q5B-&~*UD)l6hKH5!X!T})s_jTL0tvpgiBvS#rtxJ{;Vbd2LSq%H254PceDXdAV!4Yv$Xzj(ehizW;{%n*3?iP33ys zy{67(r)zfS^!WQ)u7gZ0IUDjd4fD#?rYnbMZOiV8Tyn|XlCKW@zC7@ty6J(pVbzPu z18X(NDo%A=n3(Oqkj?vQbJ??3Y*UV9(RW|0TNLY-;43{#VleNnT^37b$Ckv#yxYHE z_d{GR_iV?^M6Q1(o3Cq}-*+WBE9T)l-(PYE^VQ7@<;}~Y69YY=C_RpiMw5gd%A?bO zA0Vg`NRela{LbMtnPhU&Rc}(w@Kilv4ku`wr@_8EL%$m@}8x6pI|ql*d-O2bR&wGk>4`2bqrA)q*PBXvwoAz z(J(4DiX2GZxTt6l1102YNqO%e?pWWWq9Yjh9e{;#g;F#saQo$MFvj|Fvrv?q8RNfP zu55uVfQYu+oFZei7i`uU!?rzR2(V{@5$|^UEEXBVEZH|R#;HQw24e`4Z3|;;=gLuJ z46{^XxT5p^oiX^Jb7C)T*b(lhFaWSlHuwY;NEA+HDZ;rRgLg0- z8p1NbW@^FTrP45k?@>5S;S_}-3K0rtC@27%h{U+n3Kv>4a1d6anaJjqMxrB1R#oGp zkzt$~1BwFn8wM92Q#bEI3QwZt(yE`=&($xLJ~#QDyxp^jfmS$VE&XZs$Jr&`cb{)q zA+jXCBTjbmR?DfxkzVfzAADVhC?{r_Dn4Xw!;%vt&s=t?~(?QE@;V#ctlmk;+k>ly-GvsV#OTfs+AafA^ z2xP%ZosUFfsc0mET`Jzt}dgMzyP_$>XZVzBa~n&Ao!)MGfx;jqbw9$ikWE;>p-!s#qOm_AsuS(G>)* hFEb0rPyXgDqrVUvT^LI*#(waaYF4^UWKZ=Q{{m~*sZRg^ delta 1173 zcmZ`&PfQe77=LefW_EXG7q;%gBC;_2a~VrPWDT@Mq>6y480=(pl}*=?9oT7>+0C29 zS`s3cCQT1b$t$+K>A^_U7!oc`F9i;b$K_@wz0|}*PhC>Is4;$T785-5CExen`~80N zd*6KX=Hc1z25SBbg+v77kCWf#{*A8GG~h2+5-EIWWqAoVt-OijIBmk6XY10EDy02s zi3e?xQ|lYiR~)HRW9bm!5+DsOQm-CN`_fW6kn}Jma0h^Ebh+4^M`_+z$A9{6{)7=) zMZL(v5FQ9G1a}l!JcOX32%4Be$ns5tt~xZIikai_J;d@)d;gaUXnxi6NeUrdKq$4K z1ri{g{173)y?{rm#PvO5fDx1vI|qF4>}53h1ws;%k;X$Yps)g|Tc3iB-UODHSt*v7 zGTm9CSH)Q5L9b`u(;eW+(Xv>FFVov%OcKB)?z*hfZLxjAeR7|^W$!esc{H!aseBLD zffQl!2XUMf7K*Z6u;y&pv`Pi}^yql+Sxzb(y(cAxoSOIbikBP#J^2WdBk0AVVP%yd>0wwOOUGF1XDLZv zNMmP$u$1==5(B_FFl$;_c76s9%((TD6F{)`+pi%lql$<^waaIo)Ww$1)9b;%r6-MD zy8%8ZR1m+B3BAJ7FzCohmPV*itMKEnOi^09#n;2PU!{*iE%ZkCxd6AxyLgU@2b*X( z(w966vhxa0x?pzvP`~}PS;~=7wiOI}MlTwUU>iD_$$`obFqxpgM-<*>-;j1j4@+?6 zkO^Vu5WN_k<$@>}-q=3$7mm6vyj{$jB{BvI#YZmC^>`m1rcdMD{1t$fHX_a2c*ODM z4GY>i4Ui++=Ax0Y^m)T}uxYak3q7m$Dm+^t^Li;WYvzp%(-8ycUujIfi3jP=^7lfu z5j4^I4FGpqPkHVm`mFV1*tE1A7mA?1hv=2IMD0HwSSxsp`Zoq>zb8NGXr?zXpget^Sk gkqUxovt!fVQru int: - """Get channel priority for MP4 files based on configured folder names.""" - if not file_path.lower().endswith('.mp4'): - return -1 # Not an MP4 file - - channel = extract_channel_from_path(file_path, self.channel_priorities) + """Get channel priority for files based on configured folder names.""" + channel = extract_consolidated_channel_from_path(file_path, self.channel_priorities) if not channel: return len(self.channel_priorities) # Lowest priority if no channel found @@ -235,10 +233,10 @@ class SongMatcher: ext = get_file_extension(song['path']) stats['file_type_breakdown'][ext] += 1 - if ext == '.mp4': - channel = extract_channel_from_path(song['path'], self.channel_priorities) - if channel: - stats['channel_breakdown'][channel] += 1 + # Use consolidated channel extraction for all file types + channel = extract_consolidated_channel_from_path(song['path'], self.channel_priorities) + if channel: + stats['channel_breakdown'][channel] += 1 # Select best song and mark others for skipping best_song, group_skips = self.select_best_song(group_songs) diff --git a/cli/utils.py b/cli/utils.py index 3ede021..5e28c8b 100644 --- a/cli/utils.py +++ b/cli/utils.py @@ -117,6 +117,104 @@ def extract_channel_from_path(file_path: str, channel_priorities: List[str] = No if re.search(escaped_channel, path_lower): return channel + # If no configured channel found, extract the parent directory name + # Split path and look for the folder containing the file + parts = file_path.split('\\') + + # Look for MP4 folder structure: MP4/ChannelName/song.mp4 + for i, part in enumerate(parts): + if part.lower() == 'mp4' and i < len(parts) - 1: + # If MP4 is found, return the next folder (the actual channel) + if i + 1 < len(parts): + next_part = parts[i + 1] + # Check if the next part is a folder (no file extension) or a file + if '.' not in next_part: + return next_part # It's a folder, so it's the channel name + else: + return 'MP4 Root' # File is directly in MP4 folder + else: + return 'MP4 Root' + + # If no specific channel found, return the folder containing the file + if len(parts) >= 2: + parent_folder = parts[-2] # Second to last part (folder containing the file) + # If parent folder is MP4, then file is in root + if parent_folder.lower() == 'mp4': + return 'MP4 Root' + # Make sure we're returning a folder name, not a filename + if '.' not in parent_folder: + return parent_folder + else: + # If parent is also a file, go up one more level + if len(parts) >= 3: + grandparent = parts[-3] + if '.' not in grandparent: + return grandparent + + return None + + +def extract_consolidated_channel_from_path(file_path: str, channel_priorities: List[str] = None) -> Optional[str]: + """Extract channel information with consolidated MP3/CDG handling.""" + path_lower = file_path.lower() + parts = file_path.split('\\') + + # Handle MP3/CDG files - consolidate all CDG folder content under "MP3" + if file_path.lower().endswith(('.mp3', '.cdg')): + # Look for CDG folder structure: CDG/ParentFolder/SubFolder/file.mp3 + for i, part in enumerate(parts): + if part.lower() == 'cdg' and i < len(parts) - 1: + # If CDG is found, return "MP3" as the consolidated channel + return 'MP3' + + # If not in CDG folder but still MP3/CDG, return "MP3" + return 'MP3' + + # Handle MP4 files - keep existing channel logic + if not file_path.lower().endswith('.mp4'): + return None + + if not channel_priorities: + return None + + # Look for configured channel priority folder names in the path + for channel in channel_priorities: + # Escape special regex characters in the channel name + escaped_channel = re.escape(channel.lower()) + if re.search(escaped_channel, path_lower): + return channel + + # If no configured channel found, extract the parent directory name + # Look for MP4 folder structure: MP4/ChannelName/song.mp4 + for i, part in enumerate(parts): + if part.lower() == 'mp4' and i < len(parts) - 1: + # If MP4 is found, return the next folder (the actual channel) + if i + 1 < len(parts): + next_part = parts[i + 1] + # Check if the next part is a folder (no file extension) or a file + if '.' not in next_part: + return next_part # It's a folder, so it's the channel name + else: + return 'MP4 Root' # File is directly in MP4 folder + else: + return 'MP4 Root' + + # If no specific channel found, return the folder containing the file + if len(parts) >= 2: + parent_folder = parts[-2] # Second to last part (folder containing the file) + # If parent folder is MP4, then file is in root + if parent_folder.lower() == 'mp4': + return 'MP4 Root' + # Make sure we're returning a folder name, not a filename + if '.' not in parent_folder: + return parent_folder + else: + # If parent is also a file, go up one more level + if len(parts) >= 3: + grandparent = parts[-3] + if '.' not in grandparent: + return grandparent + return None diff --git a/web/app.py b/web/app.py index 5fccee3..0669f23 100644 --- a/web/app.py +++ b/web/app.py @@ -70,12 +70,22 @@ def get_file_type(path: str) -> str: return 'Unknown' def extract_channel(path: str) -> str: - """Extract channel name from path.""" + """Extract channel name from path with consolidated MP3/CDG handling.""" path_lower = path.lower() - - # Split path into parts parts = path.split('\\') + # Handle MP3/CDG files - consolidate all CDG folder content under "MP3" + if path.lower().endswith(('.mp3', '.cdg')): + # Look for CDG folder structure: CDG/ParentFolder/SubFolder/file.mp3 + for i, part in enumerate(parts): + if part.lower() == 'cdg' and i < len(parts) - 1: + # If CDG is found, return "MP3" as the consolidated channel + return 'MP3' + + # If not in CDG folder but still MP3/CDG, return "MP3" + return 'MP3' + + # Handle MP4 files - keep existing channel logic # Look for specific known channels first known_channels = ['Sing King Karaoke', 'KaraFun Karaoke', 'Stingray Karaoke'] for channel in known_channels: @@ -88,9 +98,9 @@ def extract_channel(path: str) -> str: # If MP4 is found, return the next folder (the actual channel) if i + 1 < len(parts): next_part = parts[i + 1] - # Skip if the next part is the filename (no extension means it's a folder) + # Check if the next part is a folder (no file extension) or a file if '.' not in next_part: - return next_part + return next_part # It's a folder, so it's the channel name else: return 'MP4 Root' # File is directly in MP4 folder else: @@ -98,7 +108,7 @@ def extract_channel(path: str) -> str: # Look for any folder that contains 'karaoke' (fallback) for part in parts: - if 'karaoke' in part.lower(): + if 'karaoke' in part.lower() and '.' not in part: return part # If no specific channel found, return the folder containing the file @@ -107,7 +117,15 @@ def extract_channel(path: str) -> str: # If parent folder is MP4, then file is in root if parent_folder.lower() == 'mp4': return 'MP4 Root' - return parent_folder + # Make sure we're returning a folder name, not a filename + if '.' not in parent_folder: + return parent_folder + else: + # If parent is also a file, go up one more level + if len(parts) >= 3: + grandparent = parts[-3] + if '.' not in grandparent: + return grandparent return 'Unknown'