From cb1e1985c3c0cc4206819b73f1a92b7637e9f5d3 Mon Sep 17 00:00:00 2001 From: mbrucedogs Date: Mon, 4 Aug 2025 16:01:31 -0500 Subject: [PATCH] Signed-off-by: mbrucedogs --- BillboardPlaylistUpdater/Program.cs | 593 ++++++++++++++++------------ 1 file changed, 350 insertions(+), 243 deletions(-) diff --git a/BillboardPlaylistUpdater/Program.cs b/BillboardPlaylistUpdater/Program.cs index db9460f..c51c8da 100644 --- a/BillboardPlaylistUpdater/Program.cs +++ b/BillboardPlaylistUpdater/Program.cs @@ -24,7 +24,7 @@ namespace BillboardPlaylistUpdater static void Main(string[] args) { - //args = new string[] { "mbrucedogstest" }; + //args = new string[] { "mbrucedogs" }; if (args.Length != 1) { Console.WriteLine("usage: songcrawler partyid songspath"); @@ -49,279 +49,386 @@ namespace BillboardPlaylistUpdater else songList = new List(); - //RunTest(); - //update Shared Charts and save - UpdateCurrentCharts(); - client.Set(firepath, songList); + ////RunTest(); - //update Controller Charts for Local Search and save + //// TEST MODE: Only process first list and don't save back to Firebase + //Console.WriteLine("TEST MODE: Processing only first list..."); + //if (songList != null && songList.Count > 0) + //{ + // var firstList = songList[0]; + // Console.WriteLine("********************************************************"); + // Console.WriteLine(string.Format("Matching Controllers Songs for {0}", firstList.Title)); + // Console.WriteLine("********************************************************"); + // Search(firstList); + + // // Show results summary + // Console.WriteLine("\n=== TEST RESULTS SUMMARY ==="); + // int totalMatches = 0; + // foreach (var song in firstList.Songs) + // { + // Console.WriteLine($"{song.Title} - {song.Artist}: {song.FoundSongs.Count} matches"); + // totalMatches += song.FoundSongs.Count; + // } + // Console.WriteLine($"Total matches across all songs: {totalMatches}"); + //} + //else + //{ + // Console.WriteLine("No song lists found to test."); + //} + + // Commented out Firebase saves for testing + client.Set(firepath, songList); client.Set(string.Format("controllers/{0}/songList", controller), songList); UpdateSearchLists(); client.Set(string.Format("controllers/{0}/songList", controller), songList); } - static void RunTest() - { - var testArtist = "Linkin Park Featuring Kiiara".RemoveCrap().ToLower(); - var testTitle = "Heavy"; - var psongs = songs.Where(s => s.Title.Contains(testTitle)).ToList(); - foreach (var item in psongs) - { - var ia = item.Artist.RemoveCrap(); - var it = item.Title.RemoveCrap(); - var artist = DoesMatch(ia, testArtist); - var title = DoesMatch(it, testTitle); - } - } - - static void UpdateCurrentCharts() - { - SongList hot100 = Download("Hot 100", "https://www.billboard.com/charts/hot-100"); - SongList pop = Download("Pop-Songs", "https://www.billboard.com/charts/pop-songs"); - SongList rock = Download("Rock-Songs", "https://www.billboard.com/charts/rock-songs"); - SongList country = Download("Country-Songs", "https://www.billboard.com/charts/country-songs"); - SongList hiphop = Download("R-B-Hip-Hop-Songs", "https://www.billboard.com/charts/r-b-hip-hop-songs"); - List localSongList = new List(); - localSongList.Add(pop); - localSongList.Add(rock); - localSongList.Add(country); - localSongList.Add(hiphop); - localSongList.Add(hot100); - - foreach (SongList sl in localSongList) - { - try - { - Console.WriteLine(string.Format("Checking for {0}", sl.Title)); - var found = songList.Where(s => s.Title.ToLower() == sl.Title.ToLower()); - if (found != null) - { - var items = found.ToList(); - foreach (var item in items) - { - songList.Remove(item); - } - } - songList.Add(sl); - - } - catch (Exception ex) - { - Console.WriteLine(ex.Message); - } - } - - songList = songList.OrderByDescending(l => l.Title).ToList(); - } - static void UpdateSearchLists() { - //update the controller SongLists - foreach (var list in songList) + //update the controller SongLists using parallel processing + Console.WriteLine($"Processing {songList.Count} lists in parallel..."); + + var options = new ParallelOptions { MaxDegreeOfParallelism = Environment.ProcessorCount }; + + Parallel.ForEach(songList, options, list => { - Console.WriteLine("********************************************************"); - Console.WriteLine(string.Format("Matching Controllers Songs for {0}", list.Title)); - Console.WriteLine("********************************************************"); + Console.WriteLine($"********************************************************"); + Console.WriteLine($"Matching Controllers Songs for {list.Title}"); + Console.WriteLine($"********************************************************"); Search(list); - } + }); } static void Search(SongList list) { - foreach (var song in list.Songs) - { - song.FoundSongs.Clear(); - var bA = song.Artist.RemoveCrap().ToLower(); - var bT = song.Title.RemoveCrap().ToLower(); + if (list.Songs == null || songs == null) + return; - foreach (var item in songs) + // Pre-filter disabled songs once + var availableSongs = songs.Where(s => !s.Disabled).ToList(); + Console.WriteLine($"Searching through {availableSongs.Count} available songs for {list.Songs.Count} songs in list..."); + + // Build fast lookup indexes + var titleIndex = BuildTitleIndex(availableSongs); + var artistIndex = BuildArtistIndex(availableSongs); + + // Process each song in the list sequentially + foreach (var songListItem in list.Songs) + { + Console.WriteLine($"Searching for: {songListItem.Title} - {songListItem.Artist}"); + + var songMatches = new List<(Song song, double score)>(); + + // Get candidate matches using indexes + var titleCandidates = GetTitleCandidates(songListItem.Title, titleIndex); + var artistCandidates = GetArtistCandidates(songListItem.Artist, artistIndex); + + // Combine and deduplicate candidates + var allCandidates = titleCandidates.Union(artistCandidates).Distinct().ToList(); + + Console.WriteLine($" Found {allCandidates.Count} candidates to evaluate..."); + + // Evaluate only the candidates (much faster than checking all songs) + foreach (var song in allCandidates) { - if (item.Artist != null && item.Title != null) + // Calculate similarity scores for title and artist + double titleSimilarity = CalculateSimilarity(songListItem.Title, song.Title); + double artistSimilarity = CalculateSimilarity(songListItem.Artist, song.Artist); + + // Combined score (weighted average - title is more important) + double combinedScore = (titleSimilarity * 0.7) + (artistSimilarity * 0.3); + + // If combined score is above threshold, consider it a match + if (combinedScore >= 0.85) // Adjustable threshold { - var t = item.Title.RemoveCrap().ToLower(); - var a = item.Artist.RemoveCrap().ToLower(); - bool titleMatch = DoesMatch(bT, t); - if (titleMatch && DoesMatch(bA, a)) + songMatches.Add((song, combinedScore)); + } + } + + // Sort matches by relevance and file type priority + songListItem.FoundSongs = songMatches + .OrderByDescending(x => x.score) + .ThenBy(x => GetFileTypePriority(x.song)) + .ThenBy(x => GetChannelPriority(x.song)) + .Take(10) // Limit to top 10 matches + .Select(x => x.song) + .ToList(); + + Console.WriteLine($" Total matches found: {songListItem.FoundSongs.Count}"); + } + } + + static double CalculateSimilarity(string str1, string str2) + { + if (string.IsNullOrEmpty(str1) || string.IsNullOrEmpty(str2)) + return 0.0; + + // Normalize strings for comparison + str1 = NormalizeString(str1); + str2 = NormalizeString(str2); + + // Use multiple fuzzy string algorithms and combine results + double diceCoeff = str1.DiceCoefficient(str2); + double levenshtein = 1.0 - (str1.LevenshteinDistance(str2) / (double)Math.Max(str1.Length, str2.Length)); + + // Use Longest Common Subsequence as a third algorithm + double lcsScore = CalculateLCSSimilarity(str1, str2); + + // Weighted average of different algorithms + return (diceCoeff * 0.4) + (levenshtein * 0.3) + (lcsScore * 0.3); + } + + static double CalculateCombinedSimilarity(SongListSong songListItem, Song song) + { + double titleSimilarity = CalculateSimilarity(songListItem.Title, song.Title); + double artistSimilarity = CalculateSimilarity(songListItem.Artist, song.Artist); + + // Weight title more heavily than artist + return (titleSimilarity * 0.7) + (artistSimilarity * 0.3); + } + + static string NormalizeString(string input) + { + if (string.IsNullOrEmpty(input)) + return string.Empty; + + // Remove common karaoke prefixes/suffixes + var normalized = input.ToLowerInvariant(); + + // Remove common karaoke indicators + normalized = Regex.Replace(normalized, @"\b(karaoke|karaoke version|instrumental|backing track)\b", "", RegexOptions.IgnoreCase); + + // Remove extra whitespace and punctuation + normalized = Regex.Replace(normalized, @"\s+", " "); + normalized = Regex.Replace(normalized, @"[^\w\s]", ""); + + return normalized.Trim(); + } + + static double CalculateLCSSimilarity(string str1, string str2) + { + if (string.IsNullOrEmpty(str1) || string.IsNullOrEmpty(str2)) + return 0.0; + + int lcsLength = CalculateLCSLength(str1, str2); + int maxLength = Math.Max(str1.Length, str2.Length); + + if (maxLength == 0) + return 1.0; // Both strings are empty + + return (double)lcsLength / maxLength; + } + + static int CalculateLCSLength(string str1, string str2) + { + int[,] dp = new int[str1.Length + 1, str2.Length + 1]; + + for (int i = 1; i <= str1.Length; i++) + { + for (int j = 1; j <= str2.Length; j++) + { + if (str1[i - 1] == str2[j - 1]) + { + dp[i, j] = dp[i - 1, j - 1] + 1; + } + else + { + dp[i, j] = Math.Max(dp[i - 1, j], dp[i, j - 1]); + } + } + } + + return dp[str1.Length, str2.Length]; + } + + static int GetFileTypePriority(Song song) + { + // MP4 files get priority (lower number = higher priority) + if (song.FileType == FileType.MP4) + return 0; + else + return 1; + } + + static int GetChannelPriority(Song song) + { + // Channel priorities in order (lower number = higher priority) + string[] channelPriorities = { + "Sing King Karaoke", + "KaraFun Karaoke", + "Stingray Karaoke" + }; + + // Extract folder name from path + string folderName = ExtractFolderName(song.Path); + + // Find the priority index + for (int i = 0; i < channelPriorities.Length; i++) + { + if (folderName.IndexOf(channelPriorities[i], StringComparison.OrdinalIgnoreCase) >= 0) + { + return i; // Return the priority index (0 = highest priority) + } + } + + // If not found in priority list, give it lowest priority + return channelPriorities.Length; + } + + static string ExtractFolderName(string path) + { + if (string.IsNullOrEmpty(path)) + return string.Empty; + + try + { + // Get the directory name from the path + string directory = Path.GetDirectoryName(path); + if (string.IsNullOrEmpty(directory)) + return string.Empty; + + // Get the last folder name + return Path.GetFileName(directory); + } + catch + { + return string.Empty; + } + } + + // Index building methods for fast lookup + static Dictionary> BuildTitleIndex(List songs) + { + var index = new Dictionary>(); + + foreach (var song in songs) + { + var normalizedTitle = NormalizeString(song.Title); + var words = normalizedTitle.Split(' ', (char)StringSplitOptions.RemoveEmptyEntries); + + // Index by each word in the title + foreach (var word in words) + { + if (word.Length >= 3) // Only index words with 3+ characters + { + if (!index.ContainsKey(word)) + index[word] = new List(); + index[word].Add(song); + } + } + + // Also index by first few characters for prefix matching + for (int i = 3; i <= Math.Min(8, normalizedTitle.Length); i++) + { + var prefix = normalizedTitle.Substring(0, i); + if (!index.ContainsKey(prefix)) + index[prefix] = new List(); + index[prefix].Add(song); + } + } + + return index; + } + + static Dictionary> BuildArtistIndex(List songs) + { + var index = new Dictionary>(); + + foreach (var song in songs) + { + var normalizedArtist = NormalizeString(song.Artist); + var words = normalizedArtist.Split(' ', (char)StringSplitOptions.RemoveEmptyEntries); + + // Index by each word in the artist name + foreach (var word in words) + { + if (word.Length >= 3) // Only index words with 3+ characters + { + if (!index.ContainsKey(word)) + index[word] = new List(); + index[word].Add(song); + } + } + + // Also index by first few characters for prefix matching + for (int i = 3; i <= Math.Min(8, normalizedArtist.Length); i++) + { + var prefix = normalizedArtist.Substring(0, i); + if (!index.ContainsKey(prefix)) + index[prefix] = new List(); + index[prefix].Add(song); + } + } + + return index; + } + + static List GetTitleCandidates(string searchTitle, Dictionary> titleIndex) + { + var candidates = new HashSet(); + var normalizedSearch = NormalizeString(searchTitle); + var searchWords = normalizedSearch.Split(' ', (char)StringSplitOptions.RemoveEmptyEntries); + + // Find songs that contain any of the search words + foreach (var word in searchWords) + { + if (word.Length >= 3 && titleIndex.ContainsKey(word)) + { + foreach (var song in titleIndex[word]) + { + candidates.Add(song); + } + } + + // Also check prefixes + for (int i = 3; i <= Math.Min(8, word.Length); i++) + { + var prefix = word.Substring(0, i); + if (titleIndex.ContainsKey(prefix)) + { + foreach (var song in titleIndex[prefix]) { - song.FoundSongs.Add(item); + candidates.Add(song); } } } - Console.WriteLine("Found ({0}) Song:{1} - {2}", song.FoundSongs.Count(), song.Artist, song.Title); - } - } - - static bool DoesMatch(string primary, string toMatch) - { - if (primary.Contains(toMatch) || toMatch.Contains(primary)) { return true; } - int diff = primary.LevenshteinDistance(toMatch); - int distance = 3; - if (toMatch.Length < 6) { distance = 2; } - return diff < distance; - } - - static SongList Download(string listName, string url) - { - DateTime now = DateTime.Now; - string title = now.Year + " - " + listName; - - Console.WriteLine("Downloading " + title); - - string html = DownloadHtml(url); - - SongList list = null; - List songs = Parse(title, html); - if (songs != null) - { - list = new SongList(); - list.Title = title; - list.Songs = songs; - } - return list; - } - - static List Parse(string name, string html) - { - List songs = null; - var parser = new HtmlParser(); - var document = parser.Parse(html); - //2-? - var articles = document.QuerySelectorAll("ul.o-chart-results-list-row"); - if (articles.Count() > 0) - { - Console.WriteLine("Found " + articles.Count() + " Songs"); - songs = new List(); - } - - var i = 1; - foreach (var article in articles) - { - var title = "none"; //article.Attributes["data-title"].Value; - var artist = "none";//article.Attributes["data-artist"].Value; - var position = i; //article.Attributes["data-rank"].Value; - - var listItems = article.QuerySelectorAll("li.o-chart-results-list__item"); - foreach(var listItem in listItems) - {; - var h3 = listItem.QuerySelectorAll("h3"); - var span = listItem.QuerySelectorAll("span"); - if (h3.Length > 0 && span.Length > 0) - { - title = h3[0].InnerHtml.Trim(); - artist = span[0].InnerHtml.Trim(); - break; - } - - } - var song = new SongListSong(); - song.Artist = artist; - song.Title = title; - song.Position = Convert.ToInt32(position); - songs.Add(song); - i++; - } - Console.Write("Parsed " + songs.Count() + " Songs"); - return songs; - } - - static SongList DownloadHot100(string listName, string url) - { - DateTime now = DateTime.Now; - string title = now.Year + " - " + listName; - - Console.WriteLine("Downloading " + title); - - string html = DownloadHtml(url); - - SongList list = null; - List songs = ParseHot100(title, html); - if (songs != null) - { - list = new SongList(); - list.Title = title; - list.Songs = songs; - } - return list; - } - - static List ParseHot100(string name, string html) - { - List songs = null; - var parser = new HtmlParser(); - var document = parser.Parse(html); - //2-? - var cs = "chart-results-list=\""; - var ce = "data-icons=\"https:"; - var ics = html.IndexOf(cs) + cs.Length; - var ice = html.IndexOf(ce); - var json = html.Substring(ics, ice - ics); - json = json.Replace("\"", "").Replace(""", "\"").Replace(""quot;", "\"").Replace("&quoquot;;", "\"").Replace("&ququot;", "\""); - JArray articles = JArray.Parse(json); - - if (articles.Count() > 0) - { - Console.WriteLine("Found " + articles.Count() + " Songs"); - songs = new List(); - } - - - var i = 1; - foreach (var article in articles) - { - var title = (string)article["title"]; - var artist = (string)article["artist_name"]; - var song = new SongListSong(); - song.Artist = WebUtility.HtmlDecode(artist); - song.Title = WebUtility.HtmlDecode(title); - song.Position = Convert.ToInt32(i); - songs.Add(song); - i++; - } - Console.Write("Parsed " + songs.Count() + " Songs"); - return songs; + return candidates.ToList(); } - - - - static string DownloadHtml(string url) + + static List GetArtistCandidates(string searchArtist, Dictionary> artistIndex) { - string data = null; - ServicePointManager.SecurityProtocol = SecurityProtocolType.Ssl3 | SecurityProtocolType.Tls | SecurityProtocolType.Tls11 | SecurityProtocolType.Tls12; - ServicePointManager.ServerCertificateValidationCallback += (sender, certificate, chain, errors) => true; - ServicePointManager.ServerCertificateValidationCallback = delegate { return true; }; - HttpWebRequest request = (HttpWebRequest)WebRequest.Create(url); - request.Method = "GET"; - HttpWebResponse response = (HttpWebResponse)request.GetResponse(); - - if (response.StatusCode == HttpStatusCode.OK) + var candidates = new HashSet(); + var normalizedSearch = NormalizeString(searchArtist); + var searchWords = normalizedSearch.Split(' ', (char)StringSplitOptions.RemoveEmptyEntries); + + // Find songs that contain any of the search words + foreach (var word in searchWords) { - Stream receiveStream = response.GetResponseStream(); - StreamReader readStream = null; - if (response.CharacterSet == null) + if (word.Length >= 3 && artistIndex.ContainsKey(word)) { - readStream = new StreamReader(receiveStream); + foreach (var song in artistIndex[word]) + { + candidates.Add(song); + } } - else + + // Also check prefixes + for (int i = 3; i <= Math.Min(8, word.Length); i++) { - readStream = new StreamReader(receiveStream, Encoding.GetEncoding(response.CharacterSet)); + var prefix = word.Substring(0, i); + if (artistIndex.ContainsKey(prefix)) + { + foreach (var song in artistIndex[prefix]) + { + candidates.Add(song); + } + } } - data = readStream.ReadToEnd(); - - response.Close(); - readStream.Close(); } - return data; - } - } - - static class StingExtension - { - public static string RemoveCrap(this String str) - { - string regex = "(\\[.*\\])|(\".*\")|('.*')|(\\(.*\\))"; - return Regex.Replace(str, regex, "").ToLower().Replace("ft.", "").Replace("feat.", "").Replace("featured", "").Replace("featuring", "").Replace("'", "").Replace(" "," ").Trim(); + + return candidates.ToList(); } } }