Signed-off-by: mbrucedogs <mbrucedogs@gmail.com>

This commit is contained in:
mbrucedogs 2025-08-04 16:01:31 -05:00
parent 8bec97afc4
commit cb1e1985c3

View File

@ -24,7 +24,7 @@ namespace BillboardPlaylistUpdater
static void Main(string[] args) static void Main(string[] args)
{ {
//args = new string[] { "mbrucedogstest" }; //args = new string[] { "mbrucedogs" };
if (args.Length != 1) if (args.Length != 1)
{ {
Console.WriteLine("usage: songcrawler partyid songspath"); Console.WriteLine("usage: songcrawler partyid songspath");
@ -49,279 +49,386 @@ namespace BillboardPlaylistUpdater
else else
songList = new List<SongList>(); songList = new List<SongList>();
//RunTest(); ////RunTest();
//update Shared Charts and save
UpdateCurrentCharts();
client.Set(firepath, songList);
//update Controller Charts for Local Search and save //// TEST MODE: Only process first list and don't save back to Firebase
//Console.WriteLine("TEST MODE: Processing only first list...");
//if (songList != null && songList.Count > 0)
//{
// var firstList = songList[0];
// Console.WriteLine("********************************************************");
// Console.WriteLine(string.Format("Matching Controllers Songs for {0}", firstList.Title));
// Console.WriteLine("********************************************************");
// Search(firstList);
// // Show results summary
// Console.WriteLine("\n=== TEST RESULTS SUMMARY ===");
// int totalMatches = 0;
// foreach (var song in firstList.Songs)
// {
// Console.WriteLine($"{song.Title} - {song.Artist}: {song.FoundSongs.Count} matches");
// totalMatches += song.FoundSongs.Count;
// }
// Console.WriteLine($"Total matches across all songs: {totalMatches}");
//}
//else
//{
// Console.WriteLine("No song lists found to test.");
//}
// Commented out Firebase saves for testing
client.Set(firepath, songList);
client.Set(string.Format("controllers/{0}/songList", controller), songList); client.Set(string.Format("controllers/{0}/songList", controller), songList);
UpdateSearchLists(); UpdateSearchLists();
client.Set(string.Format("controllers/{0}/songList", controller), songList); client.Set(string.Format("controllers/{0}/songList", controller), songList);
} }
static void RunTest()
{
var testArtist = "Linkin Park Featuring Kiiara".RemoveCrap().ToLower();
var testTitle = "Heavy";
var psongs = songs.Where(s => s.Title.Contains(testTitle)).ToList();
foreach (var item in psongs)
{
var ia = item.Artist.RemoveCrap();
var it = item.Title.RemoveCrap();
var artist = DoesMatch(ia, testArtist);
var title = DoesMatch(it, testTitle);
}
}
static void UpdateCurrentCharts()
{
SongList hot100 = Download("Hot 100", "https://www.billboard.com/charts/hot-100");
SongList pop = Download("Pop-Songs", "https://www.billboard.com/charts/pop-songs");
SongList rock = Download("Rock-Songs", "https://www.billboard.com/charts/rock-songs");
SongList country = Download("Country-Songs", "https://www.billboard.com/charts/country-songs");
SongList hiphop = Download("R-B-Hip-Hop-Songs", "https://www.billboard.com/charts/r-b-hip-hop-songs");
List<SongList> localSongList = new List<SongList>();
localSongList.Add(pop);
localSongList.Add(rock);
localSongList.Add(country);
localSongList.Add(hiphop);
localSongList.Add(hot100);
foreach (SongList sl in localSongList)
{
try
{
Console.WriteLine(string.Format("Checking for {0}", sl.Title));
var found = songList.Where(s => s.Title.ToLower() == sl.Title.ToLower());
if (found != null)
{
var items = found.ToList();
foreach (var item in items)
{
songList.Remove(item);
}
}
songList.Add(sl);
}
catch (Exception ex)
{
Console.WriteLine(ex.Message);
}
}
songList = songList.OrderByDescending(l => l.Title).ToList();
}
static void UpdateSearchLists() static void UpdateSearchLists()
{ {
//update the controller SongLists //update the controller SongLists using parallel processing
foreach (var list in songList) Console.WriteLine($"Processing {songList.Count} lists in parallel...");
var options = new ParallelOptions { MaxDegreeOfParallelism = Environment.ProcessorCount };
Parallel.ForEach(songList, options, list =>
{ {
Console.WriteLine("********************************************************"); Console.WriteLine($"********************************************************");
Console.WriteLine(string.Format("Matching Controllers Songs for {0}", list.Title)); Console.WriteLine($"Matching Controllers Songs for {list.Title}");
Console.WriteLine("********************************************************"); Console.WriteLine($"********************************************************");
Search(list); Search(list);
} });
} }
static void Search(SongList list) static void Search(SongList list)
{ {
foreach (var song in list.Songs) if (list.Songs == null || songs == null)
{ return;
song.FoundSongs.Clear();
var bA = song.Artist.RemoveCrap().ToLower();
var bT = song.Title.RemoveCrap().ToLower();
foreach (var item in songs) // Pre-filter disabled songs once
{ var availableSongs = songs.Where(s => !s.Disabled).ToList();
if (item.Artist != null && item.Title != null) Console.WriteLine($"Searching through {availableSongs.Count} available songs for {list.Songs.Count} songs in list...");
{
var t = item.Title.RemoveCrap().ToLower();
var a = item.Artist.RemoveCrap().ToLower();
bool titleMatch = DoesMatch(bT, t);
if (titleMatch && DoesMatch(bA, a))
{
song.FoundSongs.Add(item);
}
}
}
Console.WriteLine("Found ({0}) Song:{1} - {2}", song.FoundSongs.Count(), song.Artist, song.Title);
// Build fast lookup indexes
var titleIndex = BuildTitleIndex(availableSongs);
var artistIndex = BuildArtistIndex(availableSongs);
// Process each song in the list sequentially
foreach (var songListItem in list.Songs)
{
Console.WriteLine($"Searching for: {songListItem.Title} - {songListItem.Artist}");
var songMatches = new List<(Song song, double score)>();
// Get candidate matches using indexes
var titleCandidates = GetTitleCandidates(songListItem.Title, titleIndex);
var artistCandidates = GetArtistCandidates(songListItem.Artist, artistIndex);
// Combine and deduplicate candidates
var allCandidates = titleCandidates.Union(artistCandidates).Distinct().ToList();
Console.WriteLine($" Found {allCandidates.Count} candidates to evaluate...");
// Evaluate only the candidates (much faster than checking all songs)
foreach (var song in allCandidates)
{
// Calculate similarity scores for title and artist
double titleSimilarity = CalculateSimilarity(songListItem.Title, song.Title);
double artistSimilarity = CalculateSimilarity(songListItem.Artist, song.Artist);
// Combined score (weighted average - title is more important)
double combinedScore = (titleSimilarity * 0.7) + (artistSimilarity * 0.3);
// If combined score is above threshold, consider it a match
if (combinedScore >= 0.85) // Adjustable threshold
{
songMatches.Add((song, combinedScore));
} }
} }
static bool DoesMatch(string primary, string toMatch) // Sort matches by relevance and file type priority
{ songListItem.FoundSongs = songMatches
if (primary.Contains(toMatch) || toMatch.Contains(primary)) { return true; } .OrderByDescending(x => x.score)
int diff = primary.LevenshteinDistance(toMatch); .ThenBy(x => GetFileTypePriority(x.song))
int distance = 3; .ThenBy(x => GetChannelPriority(x.song))
if (toMatch.Length < 6) { distance = 2; } .Take(10) // Limit to top 10 matches
return diff < distance; .Select(x => x.song)
.ToList();
Console.WriteLine($" Total matches found: {songListItem.FoundSongs.Count}");
}
} }
static SongList Download(string listName, string url) static double CalculateSimilarity(string str1, string str2)
{ {
DateTime now = DateTime.Now; if (string.IsNullOrEmpty(str1) || string.IsNullOrEmpty(str2))
string title = now.Year + " - " + listName; return 0.0;
Console.WriteLine("Downloading " + title); // Normalize strings for comparison
str1 = NormalizeString(str1);
str2 = NormalizeString(str2);
string html = DownloadHtml(url); // Use multiple fuzzy string algorithms and combine results
double diceCoeff = str1.DiceCoefficient(str2);
double levenshtein = 1.0 - (str1.LevenshteinDistance(str2) / (double)Math.Max(str1.Length, str2.Length));
SongList list = null; // Use Longest Common Subsequence as a third algorithm
List<SongListSong> songs = Parse(title, html); double lcsScore = CalculateLCSSimilarity(str1, str2);
if (songs != null)
{ // Weighted average of different algorithms
list = new SongList(); return (diceCoeff * 0.4) + (levenshtein * 0.3) + (lcsScore * 0.3);
list.Title = title;
list.Songs = songs;
}
return list;
} }
static List<SongListSong> Parse(string name, string html) static double CalculateCombinedSimilarity(SongListSong songListItem, Song song)
{ {
List<SongListSong> songs = null; double titleSimilarity = CalculateSimilarity(songListItem.Title, song.Title);
var parser = new HtmlParser(); double artistSimilarity = CalculateSimilarity(songListItem.Artist, song.Artist);
var document = parser.Parse(html);
//2-? // Weight title more heavily than artist
var articles = document.QuerySelectorAll("ul.o-chart-results-list-row"); return (titleSimilarity * 0.7) + (artistSimilarity * 0.3);
if (articles.Count() > 0)
{
Console.WriteLine("Found " + articles.Count() + " Songs");
songs = new List<SongListSong>();
} }
var i = 1; static string NormalizeString(string input)
foreach (var article in articles)
{ {
var title = "none"; //article.Attributes["data-title"].Value; if (string.IsNullOrEmpty(input))
var artist = "none";//article.Attributes["data-artist"].Value; return string.Empty;
var position = i; //article.Attributes["data-rank"].Value;
var listItems = article.QuerySelectorAll("li.o-chart-results-list__item"); // Remove common karaoke prefixes/suffixes
foreach(var listItem in listItems) var normalized = input.ToLowerInvariant();
{;
var h3 = listItem.QuerySelectorAll("h3"); // Remove common karaoke indicators
var span = listItem.QuerySelectorAll("span"); normalized = Regex.Replace(normalized, @"\b(karaoke|karaoke version|instrumental|backing track)\b", "", RegexOptions.IgnoreCase);
if (h3.Length > 0 && span.Length > 0)
{ // Remove extra whitespace and punctuation
title = h3[0].InnerHtml.Trim(); normalized = Regex.Replace(normalized, @"\s+", " ");
artist = span[0].InnerHtml.Trim(); normalized = Regex.Replace(normalized, @"[^\w\s]", "");
break;
return normalized.Trim();
} }
} static double CalculateLCSSimilarity(string str1, string str2)
var song = new SongListSong(); {
song.Artist = artist; if (string.IsNullOrEmpty(str1) || string.IsNullOrEmpty(str2))
song.Title = title; return 0.0;
song.Position = Convert.ToInt32(position);
songs.Add(song); int lcsLength = CalculateLCSLength(str1, str2);
i++; int maxLength = Math.Max(str1.Length, str2.Length);
}
Console.Write("Parsed " + songs.Count() + " Songs"); if (maxLength == 0)
return songs; return 1.0; // Both strings are empty
return (double)lcsLength / maxLength;
} }
static SongList DownloadHot100(string listName, string url) static int CalculateLCSLength(string str1, string str2)
{ {
DateTime now = DateTime.Now; int[,] dp = new int[str1.Length + 1, str2.Length + 1];
string title = now.Year + " - " + listName;
Console.WriteLine("Downloading " + title); for (int i = 1; i <= str1.Length; i++)
string html = DownloadHtml(url);
SongList list = null;
List<SongListSong> songs = ParseHot100(title, html);
if (songs != null)
{ {
list = new SongList(); for (int j = 1; j <= str2.Length; j++)
list.Title = title;
list.Songs = songs;
}
return list;
}
static List<SongListSong> ParseHot100(string name, string html)
{ {
List<SongListSong> songs = null; if (str1[i - 1] == str2[j - 1])
var parser = new HtmlParser();
var document = parser.Parse(html);
//2-?
var cs = "chart-results-list=\"";
var ce = "data-icons=\"https:";
var ics = html.IndexOf(cs) + cs.Length;
var ice = html.IndexOf(ce);
var json = html.Substring(ics, ice - ics);
json = json.Replace("\"", "").Replace("&quot;", "\"").Replace("&quotquot;", "\"").Replace("&quoquot;;", "\"").Replace("&ququot;", "\"");
JArray articles = JArray.Parse(json);
if (articles.Count() > 0)
{ {
Console.WriteLine("Found " + articles.Count() + " Songs"); dp[i, j] = dp[i - 1, j - 1] + 1;
songs = new List<SongListSong>();
}
var i = 1;
foreach (var article in articles)
{
var title = (string)article["title"];
var artist = (string)article["artist_name"];
var song = new SongListSong();
song.Artist = WebUtility.HtmlDecode(artist);
song.Title = WebUtility.HtmlDecode(title);
song.Position = Convert.ToInt32(i);
songs.Add(song);
i++;
}
Console.Write("Parsed " + songs.Count() + " Songs");
return songs;
}
static string DownloadHtml(string url)
{
string data = null;
ServicePointManager.SecurityProtocol = SecurityProtocolType.Ssl3 | SecurityProtocolType.Tls | SecurityProtocolType.Tls11 | SecurityProtocolType.Tls12;
ServicePointManager.ServerCertificateValidationCallback += (sender, certificate, chain, errors) => true;
ServicePointManager.ServerCertificateValidationCallback = delegate { return true; };
HttpWebRequest request = (HttpWebRequest)WebRequest.Create(url);
request.Method = "GET";
HttpWebResponse response = (HttpWebResponse)request.GetResponse();
if (response.StatusCode == HttpStatusCode.OK)
{
Stream receiveStream = response.GetResponseStream();
StreamReader readStream = null;
if (response.CharacterSet == null)
{
readStream = new StreamReader(receiveStream);
} }
else else
{ {
readStream = new StreamReader(receiveStream, Encoding.GetEncoding(response.CharacterSet)); dp[i, j] = Math.Max(dp[i - 1, j], dp[i, j - 1]);
} }
data = readStream.ReadToEnd();
response.Close();
readStream.Close();
}
return data;
} }
} }
static class StingExtension return dp[str1.Length, str2.Length];
}
static int GetFileTypePriority(Song song)
{ {
public static string RemoveCrap(this String str) // MP4 files get priority (lower number = higher priority)
if (song.FileType == FileType.MP4)
return 0;
else
return 1;
}
static int GetChannelPriority(Song song)
{ {
string regex = "(\\[.*\\])|(\".*\")|('.*')|(\\(.*\\))"; // Channel priorities in order (lower number = higher priority)
return Regex.Replace(str, regex, "").ToLower().Replace("ft.", "").Replace("feat.", "").Replace("featured", "").Replace("featuring", "").Replace("'", "").Replace(" "," ").Trim(); string[] channelPriorities = {
"Sing King Karaoke",
"KaraFun Karaoke",
"Stingray Karaoke"
};
// Extract folder name from path
string folderName = ExtractFolderName(song.Path);
// Find the priority index
for (int i = 0; i < channelPriorities.Length; i++)
{
if (folderName.IndexOf(channelPriorities[i], StringComparison.OrdinalIgnoreCase) >= 0)
{
return i; // Return the priority index (0 = highest priority)
}
}
// If not found in priority list, give it lowest priority
return channelPriorities.Length;
}
static string ExtractFolderName(string path)
{
if (string.IsNullOrEmpty(path))
return string.Empty;
try
{
// Get the directory name from the path
string directory = Path.GetDirectoryName(path);
if (string.IsNullOrEmpty(directory))
return string.Empty;
// Get the last folder name
return Path.GetFileName(directory);
}
catch
{
return string.Empty;
}
}
// Index building methods for fast lookup
static Dictionary<string, List<Song>> BuildTitleIndex(List<Song> songs)
{
var index = new Dictionary<string, List<Song>>();
foreach (var song in songs)
{
var normalizedTitle = NormalizeString(song.Title);
var words = normalizedTitle.Split(' ', (char)StringSplitOptions.RemoveEmptyEntries);
// Index by each word in the title
foreach (var word in words)
{
if (word.Length >= 3) // Only index words with 3+ characters
{
if (!index.ContainsKey(word))
index[word] = new List<Song>();
index[word].Add(song);
}
}
// Also index by first few characters for prefix matching
for (int i = 3; i <= Math.Min(8, normalizedTitle.Length); i++)
{
var prefix = normalizedTitle.Substring(0, i);
if (!index.ContainsKey(prefix))
index[prefix] = new List<Song>();
index[prefix].Add(song);
}
}
return index;
}
static Dictionary<string, List<Song>> BuildArtistIndex(List<Song> songs)
{
var index = new Dictionary<string, List<Song>>();
foreach (var song in songs)
{
var normalizedArtist = NormalizeString(song.Artist);
var words = normalizedArtist.Split(' ', (char)StringSplitOptions.RemoveEmptyEntries);
// Index by each word in the artist name
foreach (var word in words)
{
if (word.Length >= 3) // Only index words with 3+ characters
{
if (!index.ContainsKey(word))
index[word] = new List<Song>();
index[word].Add(song);
}
}
// Also index by first few characters for prefix matching
for (int i = 3; i <= Math.Min(8, normalizedArtist.Length); i++)
{
var prefix = normalizedArtist.Substring(0, i);
if (!index.ContainsKey(prefix))
index[prefix] = new List<Song>();
index[prefix].Add(song);
}
}
return index;
}
static List<Song> GetTitleCandidates(string searchTitle, Dictionary<string, List<Song>> titleIndex)
{
var candidates = new HashSet<Song>();
var normalizedSearch = NormalizeString(searchTitle);
var searchWords = normalizedSearch.Split(' ', (char)StringSplitOptions.RemoveEmptyEntries);
// Find songs that contain any of the search words
foreach (var word in searchWords)
{
if (word.Length >= 3 && titleIndex.ContainsKey(word))
{
foreach (var song in titleIndex[word])
{
candidates.Add(song);
}
}
// Also check prefixes
for (int i = 3; i <= Math.Min(8, word.Length); i++)
{
var prefix = word.Substring(0, i);
if (titleIndex.ContainsKey(prefix))
{
foreach (var song in titleIndex[prefix])
{
candidates.Add(song);
}
}
}
}
return candidates.ToList();
}
static List<Song> GetArtistCandidates(string searchArtist, Dictionary<string, List<Song>> artistIndex)
{
var candidates = new HashSet<Song>();
var normalizedSearch = NormalizeString(searchArtist);
var searchWords = normalizedSearch.Split(' ', (char)StringSplitOptions.RemoveEmptyEntries);
// Find songs that contain any of the search words
foreach (var word in searchWords)
{
if (word.Length >= 3 && artistIndex.ContainsKey(word))
{
foreach (var song in artistIndex[word])
{
candidates.Add(song);
}
}
// Also check prefixes
for (int i = 3; i <= Math.Min(8, word.Length); i++)
{
var prefix = word.Substring(0, i);
if (artistIndex.ContainsKey(prefix))
{
foreach (var song in artistIndex[prefix])
{
candidates.Add(song);
}
}
}
}
return candidates.ToList();
} }
} }
} }