Signed-off-by: mbrucedogs <mbrucedogs@gmail.com>

This commit is contained in:
mbrucedogs 2025-08-04 16:01:31 -05:00
parent 8bec97afc4
commit cb1e1985c3

View File

@ -24,7 +24,7 @@ namespace BillboardPlaylistUpdater
static void Main(string[] args) static void Main(string[] args)
{ {
//args = new string[] { "mbrucedogstest" }; //args = new string[] { "mbrucedogs" };
if (args.Length != 1) if (args.Length != 1)
{ {
Console.WriteLine("usage: songcrawler partyid songspath"); Console.WriteLine("usage: songcrawler partyid songspath");
@ -49,279 +49,386 @@ namespace BillboardPlaylistUpdater
else else
songList = new List<SongList>(); songList = new List<SongList>();
//RunTest(); ////RunTest();
//update Shared Charts and save
UpdateCurrentCharts();
client.Set(firepath, songList);
//update Controller Charts for Local Search and save //// TEST MODE: Only process first list and don't save back to Firebase
//Console.WriteLine("TEST MODE: Processing only first list...");
//if (songList != null && songList.Count > 0)
//{
// var firstList = songList[0];
// Console.WriteLine("********************************************************");
// Console.WriteLine(string.Format("Matching Controllers Songs for {0}", firstList.Title));
// Console.WriteLine("********************************************************");
// Search(firstList);
// // Show results summary
// Console.WriteLine("\n=== TEST RESULTS SUMMARY ===");
// int totalMatches = 0;
// foreach (var song in firstList.Songs)
// {
// Console.WriteLine($"{song.Title} - {song.Artist}: {song.FoundSongs.Count} matches");
// totalMatches += song.FoundSongs.Count;
// }
// Console.WriteLine($"Total matches across all songs: {totalMatches}");
//}
//else
//{
// Console.WriteLine("No song lists found to test.");
//}
// Commented out Firebase saves for testing
client.Set(firepath, songList);
client.Set(string.Format("controllers/{0}/songList", controller), songList); client.Set(string.Format("controllers/{0}/songList", controller), songList);
UpdateSearchLists(); UpdateSearchLists();
client.Set(string.Format("controllers/{0}/songList", controller), songList); client.Set(string.Format("controllers/{0}/songList", controller), songList);
} }
static void RunTest()
{
var testArtist = "Linkin Park Featuring Kiiara".RemoveCrap().ToLower();
var testTitle = "Heavy";
var psongs = songs.Where(s => s.Title.Contains(testTitle)).ToList();
foreach (var item in psongs)
{
var ia = item.Artist.RemoveCrap();
var it = item.Title.RemoveCrap();
var artist = DoesMatch(ia, testArtist);
var title = DoesMatch(it, testTitle);
}
}
static void UpdateCurrentCharts()
{
SongList hot100 = Download("Hot 100", "https://www.billboard.com/charts/hot-100");
SongList pop = Download("Pop-Songs", "https://www.billboard.com/charts/pop-songs");
SongList rock = Download("Rock-Songs", "https://www.billboard.com/charts/rock-songs");
SongList country = Download("Country-Songs", "https://www.billboard.com/charts/country-songs");
SongList hiphop = Download("R-B-Hip-Hop-Songs", "https://www.billboard.com/charts/r-b-hip-hop-songs");
List<SongList> localSongList = new List<SongList>();
localSongList.Add(pop);
localSongList.Add(rock);
localSongList.Add(country);
localSongList.Add(hiphop);
localSongList.Add(hot100);
foreach (SongList sl in localSongList)
{
try
{
Console.WriteLine(string.Format("Checking for {0}", sl.Title));
var found = songList.Where(s => s.Title.ToLower() == sl.Title.ToLower());
if (found != null)
{
var items = found.ToList();
foreach (var item in items)
{
songList.Remove(item);
}
}
songList.Add(sl);
}
catch (Exception ex)
{
Console.WriteLine(ex.Message);
}
}
songList = songList.OrderByDescending(l => l.Title).ToList();
}
static void UpdateSearchLists() static void UpdateSearchLists()
{ {
//update the controller SongLists //update the controller SongLists using parallel processing
foreach (var list in songList) Console.WriteLine($"Processing {songList.Count} lists in parallel...");
var options = new ParallelOptions { MaxDegreeOfParallelism = Environment.ProcessorCount };
Parallel.ForEach(songList, options, list =>
{ {
Console.WriteLine("********************************************************"); Console.WriteLine($"********************************************************");
Console.WriteLine(string.Format("Matching Controllers Songs for {0}", list.Title)); Console.WriteLine($"Matching Controllers Songs for {list.Title}");
Console.WriteLine("********************************************************"); Console.WriteLine($"********************************************************");
Search(list); Search(list);
} });
} }
static void Search(SongList list) static void Search(SongList list)
{ {
foreach (var song in list.Songs) if (list.Songs == null || songs == null)
{ return;
song.FoundSongs.Clear();
var bA = song.Artist.RemoveCrap().ToLower();
var bT = song.Title.RemoveCrap().ToLower();
foreach (var item in songs) // Pre-filter disabled songs once
var availableSongs = songs.Where(s => !s.Disabled).ToList();
Console.WriteLine($"Searching through {availableSongs.Count} available songs for {list.Songs.Count} songs in list...");
// Build fast lookup indexes
var titleIndex = BuildTitleIndex(availableSongs);
var artistIndex = BuildArtistIndex(availableSongs);
// Process each song in the list sequentially
foreach (var songListItem in list.Songs)
{
Console.WriteLine($"Searching for: {songListItem.Title} - {songListItem.Artist}");
var songMatches = new List<(Song song, double score)>();
// Get candidate matches using indexes
var titleCandidates = GetTitleCandidates(songListItem.Title, titleIndex);
var artistCandidates = GetArtistCandidates(songListItem.Artist, artistIndex);
// Combine and deduplicate candidates
var allCandidates = titleCandidates.Union(artistCandidates).Distinct().ToList();
Console.WriteLine($" Found {allCandidates.Count} candidates to evaluate...");
// Evaluate only the candidates (much faster than checking all songs)
foreach (var song in allCandidates)
{ {
if (item.Artist != null && item.Title != null) // Calculate similarity scores for title and artist
double titleSimilarity = CalculateSimilarity(songListItem.Title, song.Title);
double artistSimilarity = CalculateSimilarity(songListItem.Artist, song.Artist);
// Combined score (weighted average - title is more important)
double combinedScore = (titleSimilarity * 0.7) + (artistSimilarity * 0.3);
// If combined score is above threshold, consider it a match
if (combinedScore >= 0.85) // Adjustable threshold
{ {
var t = item.Title.RemoveCrap().ToLower(); songMatches.Add((song, combinedScore));
var a = item.Artist.RemoveCrap().ToLower(); }
bool titleMatch = DoesMatch(bT, t); }
if (titleMatch && DoesMatch(bA, a))
// Sort matches by relevance and file type priority
songListItem.FoundSongs = songMatches
.OrderByDescending(x => x.score)
.ThenBy(x => GetFileTypePriority(x.song))
.ThenBy(x => GetChannelPriority(x.song))
.Take(10) // Limit to top 10 matches
.Select(x => x.song)
.ToList();
Console.WriteLine($" Total matches found: {songListItem.FoundSongs.Count}");
}
}
static double CalculateSimilarity(string str1, string str2)
{
if (string.IsNullOrEmpty(str1) || string.IsNullOrEmpty(str2))
return 0.0;
// Normalize strings for comparison
str1 = NormalizeString(str1);
str2 = NormalizeString(str2);
// Use multiple fuzzy string algorithms and combine results
double diceCoeff = str1.DiceCoefficient(str2);
double levenshtein = 1.0 - (str1.LevenshteinDistance(str2) / (double)Math.Max(str1.Length, str2.Length));
// Use Longest Common Subsequence as a third algorithm
double lcsScore = CalculateLCSSimilarity(str1, str2);
// Weighted average of different algorithms
return (diceCoeff * 0.4) + (levenshtein * 0.3) + (lcsScore * 0.3);
}
static double CalculateCombinedSimilarity(SongListSong songListItem, Song song)
{
double titleSimilarity = CalculateSimilarity(songListItem.Title, song.Title);
double artistSimilarity = CalculateSimilarity(songListItem.Artist, song.Artist);
// Weight title more heavily than artist
return (titleSimilarity * 0.7) + (artistSimilarity * 0.3);
}
static string NormalizeString(string input)
{
if (string.IsNullOrEmpty(input))
return string.Empty;
// Remove common karaoke prefixes/suffixes
var normalized = input.ToLowerInvariant();
// Remove common karaoke indicators
normalized = Regex.Replace(normalized, @"\b(karaoke|karaoke version|instrumental|backing track)\b", "", RegexOptions.IgnoreCase);
// Remove extra whitespace and punctuation
normalized = Regex.Replace(normalized, @"\s+", " ");
normalized = Regex.Replace(normalized, @"[^\w\s]", "");
return normalized.Trim();
}
static double CalculateLCSSimilarity(string str1, string str2)
{
if (string.IsNullOrEmpty(str1) || string.IsNullOrEmpty(str2))
return 0.0;
int lcsLength = CalculateLCSLength(str1, str2);
int maxLength = Math.Max(str1.Length, str2.Length);
if (maxLength == 0)
return 1.0; // Both strings are empty
return (double)lcsLength / maxLength;
}
static int CalculateLCSLength(string str1, string str2)
{
int[,] dp = new int[str1.Length + 1, str2.Length + 1];
for (int i = 1; i <= str1.Length; i++)
{
for (int j = 1; j <= str2.Length; j++)
{
if (str1[i - 1] == str2[j - 1])
{
dp[i, j] = dp[i - 1, j - 1] + 1;
}
else
{
dp[i, j] = Math.Max(dp[i - 1, j], dp[i, j - 1]);
}
}
}
return dp[str1.Length, str2.Length];
}
static int GetFileTypePriority(Song song)
{
// MP4 files get priority (lower number = higher priority)
if (song.FileType == FileType.MP4)
return 0;
else
return 1;
}
static int GetChannelPriority(Song song)
{
// Channel priorities in order (lower number = higher priority)
string[] channelPriorities = {
"Sing King Karaoke",
"KaraFun Karaoke",
"Stingray Karaoke"
};
// Extract folder name from path
string folderName = ExtractFolderName(song.Path);
// Find the priority index
for (int i = 0; i < channelPriorities.Length; i++)
{
if (folderName.IndexOf(channelPriorities[i], StringComparison.OrdinalIgnoreCase) >= 0)
{
return i; // Return the priority index (0 = highest priority)
}
}
// If not found in priority list, give it lowest priority
return channelPriorities.Length;
}
static string ExtractFolderName(string path)
{
if (string.IsNullOrEmpty(path))
return string.Empty;
try
{
// Get the directory name from the path
string directory = Path.GetDirectoryName(path);
if (string.IsNullOrEmpty(directory))
return string.Empty;
// Get the last folder name
return Path.GetFileName(directory);
}
catch
{
return string.Empty;
}
}
// Index building methods for fast lookup
static Dictionary<string, List<Song>> BuildTitleIndex(List<Song> songs)
{
var index = new Dictionary<string, List<Song>>();
foreach (var song in songs)
{
var normalizedTitle = NormalizeString(song.Title);
var words = normalizedTitle.Split(' ', (char)StringSplitOptions.RemoveEmptyEntries);
// Index by each word in the title
foreach (var word in words)
{
if (word.Length >= 3) // Only index words with 3+ characters
{
if (!index.ContainsKey(word))
index[word] = new List<Song>();
index[word].Add(song);
}
}
// Also index by first few characters for prefix matching
for (int i = 3; i <= Math.Min(8, normalizedTitle.Length); i++)
{
var prefix = normalizedTitle.Substring(0, i);
if (!index.ContainsKey(prefix))
index[prefix] = new List<Song>();
index[prefix].Add(song);
}
}
return index;
}
static Dictionary<string, List<Song>> BuildArtistIndex(List<Song> songs)
{
var index = new Dictionary<string, List<Song>>();
foreach (var song in songs)
{
var normalizedArtist = NormalizeString(song.Artist);
var words = normalizedArtist.Split(' ', (char)StringSplitOptions.RemoveEmptyEntries);
// Index by each word in the artist name
foreach (var word in words)
{
if (word.Length >= 3) // Only index words with 3+ characters
{
if (!index.ContainsKey(word))
index[word] = new List<Song>();
index[word].Add(song);
}
}
// Also index by first few characters for prefix matching
for (int i = 3; i <= Math.Min(8, normalizedArtist.Length); i++)
{
var prefix = normalizedArtist.Substring(0, i);
if (!index.ContainsKey(prefix))
index[prefix] = new List<Song>();
index[prefix].Add(song);
}
}
return index;
}
static List<Song> GetTitleCandidates(string searchTitle, Dictionary<string, List<Song>> titleIndex)
{
var candidates = new HashSet<Song>();
var normalizedSearch = NormalizeString(searchTitle);
var searchWords = normalizedSearch.Split(' ', (char)StringSplitOptions.RemoveEmptyEntries);
// Find songs that contain any of the search words
foreach (var word in searchWords)
{
if (word.Length >= 3 && titleIndex.ContainsKey(word))
{
foreach (var song in titleIndex[word])
{
candidates.Add(song);
}
}
// Also check prefixes
for (int i = 3; i <= Math.Min(8, word.Length); i++)
{
var prefix = word.Substring(0, i);
if (titleIndex.ContainsKey(prefix))
{
foreach (var song in titleIndex[prefix])
{ {
song.FoundSongs.Add(item); candidates.Add(song);
} }
} }
} }
Console.WriteLine("Found ({0}) Song:{1} - {2}", song.FoundSongs.Count(), song.Artist, song.Title);
} }
}
static bool DoesMatch(string primary, string toMatch)
{
if (primary.Contains(toMatch) || toMatch.Contains(primary)) { return true; }
int diff = primary.LevenshteinDistance(toMatch);
int distance = 3;
if (toMatch.Length < 6) { distance = 2; }
return diff < distance;
}
static SongList Download(string listName, string url)
{
DateTime now = DateTime.Now;
string title = now.Year + " - " + listName;
Console.WriteLine("Downloading " + title);
string html = DownloadHtml(url);
SongList list = null;
List<SongListSong> songs = Parse(title, html);
if (songs != null)
{
list = new SongList();
list.Title = title;
list.Songs = songs;
}
return list;
}
static List<SongListSong> Parse(string name, string html)
{
List<SongListSong> songs = null;
var parser = new HtmlParser();
var document = parser.Parse(html);
//2-?
var articles = document.QuerySelectorAll("ul.o-chart-results-list-row");
if (articles.Count() > 0)
{
Console.WriteLine("Found " + articles.Count() + " Songs");
songs = new List<SongListSong>();
}
var i = 1;
foreach (var article in articles)
{
var title = "none"; //article.Attributes["data-title"].Value;
var artist = "none";//article.Attributes["data-artist"].Value;
var position = i; //article.Attributes["data-rank"].Value;
var listItems = article.QuerySelectorAll("li.o-chart-results-list__item");
foreach(var listItem in listItems)
{;
var h3 = listItem.QuerySelectorAll("h3");
var span = listItem.QuerySelectorAll("span");
if (h3.Length > 0 && span.Length > 0)
{
title = h3[0].InnerHtml.Trim();
artist = span[0].InnerHtml.Trim();
break;
}
}
var song = new SongListSong();
song.Artist = artist;
song.Title = title;
song.Position = Convert.ToInt32(position);
songs.Add(song);
i++;
}
Console.Write("Parsed " + songs.Count() + " Songs");
return songs;
}
static SongList DownloadHot100(string listName, string url)
{
DateTime now = DateTime.Now;
string title = now.Year + " - " + listName;
Console.WriteLine("Downloading " + title);
string html = DownloadHtml(url);
SongList list = null;
List<SongListSong> songs = ParseHot100(title, html);
if (songs != null)
{
list = new SongList();
list.Title = title;
list.Songs = songs;
}
return list;
}
static List<SongListSong> ParseHot100(string name, string html)
{
List<SongListSong> songs = null;
var parser = new HtmlParser();
var document = parser.Parse(html);
//2-?
var cs = "chart-results-list=\"";
var ce = "data-icons=\"https:";
var ics = html.IndexOf(cs) + cs.Length;
var ice = html.IndexOf(ce);
var json = html.Substring(ics, ice - ics);
json = json.Replace("\"", "").Replace("&quot;", "\"").Replace("&quotquot;", "\"").Replace("&quoquot;;", "\"").Replace("&ququot;", "\"");
JArray articles = JArray.Parse(json);
if (articles.Count() > 0)
{
Console.WriteLine("Found " + articles.Count() + " Songs");
songs = new List<SongListSong>();
}
return candidates.ToList();
var i = 1;
foreach (var article in articles)
{
var title = (string)article["title"];
var artist = (string)article["artist_name"];
var song = new SongListSong();
song.Artist = WebUtility.HtmlDecode(artist);
song.Title = WebUtility.HtmlDecode(title);
song.Position = Convert.ToInt32(i);
songs.Add(song);
i++;
}
Console.Write("Parsed " + songs.Count() + " Songs");
return songs;
} }
static List<Song> GetArtistCandidates(string searchArtist, Dictionary<string, List<Song>> artistIndex)
static string DownloadHtml(string url)
{ {
string data = null; var candidates = new HashSet<Song>();
ServicePointManager.SecurityProtocol = SecurityProtocolType.Ssl3 | SecurityProtocolType.Tls | SecurityProtocolType.Tls11 | SecurityProtocolType.Tls12; var normalizedSearch = NormalizeString(searchArtist);
ServicePointManager.ServerCertificateValidationCallback += (sender, certificate, chain, errors) => true; var searchWords = normalizedSearch.Split(' ', (char)StringSplitOptions.RemoveEmptyEntries);
ServicePointManager.ServerCertificateValidationCallback = delegate { return true; };
HttpWebRequest request = (HttpWebRequest)WebRequest.Create(url); // Find songs that contain any of the search words
request.Method = "GET"; foreach (var word in searchWords)
HttpWebResponse response = (HttpWebResponse)request.GetResponse();
if (response.StatusCode == HttpStatusCode.OK)
{ {
Stream receiveStream = response.GetResponseStream(); if (word.Length >= 3 && artistIndex.ContainsKey(word))
StreamReader readStream = null;
if (response.CharacterSet == null)
{ {
readStream = new StreamReader(receiveStream); foreach (var song in artistIndex[word])
{
candidates.Add(song);
}
} }
else
// Also check prefixes
for (int i = 3; i <= Math.Min(8, word.Length); i++)
{ {
readStream = new StreamReader(receiveStream, Encoding.GetEncoding(response.CharacterSet)); var prefix = word.Substring(0, i);
if (artistIndex.ContainsKey(prefix))
{
foreach (var song in artistIndex[prefix])
{
candidates.Add(song);
}
}
} }
data = readStream.ReadToEnd();
response.Close();
readStream.Close();
} }
return data;
} return candidates.ToList();
}
static class StingExtension
{
public static string RemoveCrap(this String str)
{
string regex = "(\\[.*\\])|(\".*\")|('.*')|(\\(.*\\))";
return Regex.Replace(str, regex, "").ToLower().Replace("ft.", "").Replace("feat.", "").Replace("featured", "").Replace("featuring", "").Replace("'", "").Replace(" "," ").Trim();
} }
} }
} }