From c9cb751bb580030fa1f323e11641cd5095a505ea Mon Sep 17 00:00:00 2001 From: Matt Bruce Date: Fri, 14 Apr 2017 13:50:44 -0500 Subject: [PATCH] added found songs and updated search algo Signed-off-by: Matt Bruce --- BillboardPlaylistUpdater/Program.cs | 135 ++++++++++++++++++++++++---- Herse.Models/SongList.cs | 4 + 2 files changed, 122 insertions(+), 17 deletions(-) diff --git a/BillboardPlaylistUpdater/Program.cs b/BillboardPlaylistUpdater/Program.cs index 0f78257..a6fb8e5 100644 --- a/BillboardPlaylistUpdater/Program.cs +++ b/BillboardPlaylistUpdater/Program.cs @@ -1,4 +1,5 @@ using AngleSharp.Parser.Html; +using DuoVia.FuzzyStrings; using FireSharp.Config; using FireSharp.Interfaces; using Herse.Models; @@ -10,33 +11,73 @@ using System.IO; using System.Linq; using System.Net; using System.Text; +using System.Text.RegularExpressions; using System.Threading.Tasks; -using System.Net; namespace BillboardPlaylistUpdater { class Program { + static List songs = null; + static List songList = null; static void Main(string[] args) { + //args = new string[] { "mbrucedogs" }; + + if (args.Length != 1) + { + Console.WriteLine("usage: songcrawler partyid songspath"); + return; + } + string controller = args[0]; + IFirebaseConfig config = new FirebaseConfig { AuthSecret = ConfigurationManager.AppSettings["Firebase.Secret"], BasePath = ConfigurationManager.AppSettings["Firebase.Path"] }; FireSharp.FirebaseClient client = new FireSharp.FirebaseClient(config); + songs = client.Get(string.Format("controllers/{0}/songs", "mbrucedogs")).ResultAs>(); + string firepath = "songList"; Console.WriteLine("Loading current library"); - List songList = client.Get(firepath).ResultAs>(); + songList = client.Get(firepath).ResultAs>(); + if (songList != null) Console.WriteLine(string.Format("{0} songList loaded", songList.Count)); else songList = new List(); + RunTest(); + //update Shared Charts and save + UpdateCurrentCharts(); + client.Set(firepath, songList); + + //update Controller Charts for Local Search and save + UpdateSearchLists(); + client.Set(string.Format("controllers/{0}/songList", controller), songList); + } + + static void RunTest() + { + var testArtist = "Linkin Park Featuring Kiiara".RemoveCrap().ToLower(); + var testTitle = "Heavy"; + var psongs = songs.Where(s => s.Title.Contains(testTitle)).ToList(); + foreach (var item in psongs) + { + var ia = item.Artist.RemoveCrap(); + var it = item.Title.RemoveCrap(); + var artist = DoesMatch(ia, testArtist); + var title = DoesMatch(it, testTitle); + } + } + + static void UpdateCurrentCharts() + { SongList hot100 = Download("Hot 100", "http://www.billboard.com/charts/hot-100"); - SongList pop = Download("Pop-Songs","http://www.billboard.com/charts/pop-songs"); - SongList rock = Download("Rock-Songs","http://www.billboard.com/charts/rock-songs"); + SongList pop = Download("Pop-Songs", "http://www.billboard.com/charts/pop-songs"); + SongList rock = Download("Rock-Songs", "http://www.billboard.com/charts/rock-songs"); SongList country = Download("Country-Songs", "http://www.billboard.com/charts/country-songs"); SongList hiphop = Download("R-B-Hip-Hop-Songs", "http://www.billboard.com/charts/r-b-hip-hop-songs"); @@ -53,24 +94,71 @@ namespace BillboardPlaylistUpdater { Console.WriteLine(string.Format("Checking for {0}", sl.Title)); var found = songList.Where(s => s.Title.ToLower() == sl.Title.ToLower()); - if(found != null) + if (found != null) { var items = found.ToList(); foreach (var item in items) { - songList.Remove(item); + songList.Remove(item); } } songList.Add(sl); - + } catch (Exception ex) { Console.WriteLine(ex.Message); } } - client.Set(firepath, songList); + songList = songList.OrderByDescending(l => l.Title).ToList(); + } + + static void UpdateSearchLists() + { + //update the controller SongLists + foreach (var list in songList) + { + Console.WriteLine("********************************************************"); + Console.WriteLine(string.Format("Matching Controllers Songs for {0}", list.Title)); + Console.WriteLine("********************************************************"); + Search(list); + } + } + + static void Search(SongList list) + { + foreach (var song in list.Songs) + { + song.FoundSongs.Clear(); + var bA = song.Artist.RemoveCrap().ToLower(); + var bT = song.Title.RemoveCrap().ToLower(); + + foreach (var item in songs) + { + if (item.Artist != null && item.Title != null) + { + var t = item.Title.RemoveCrap().ToLower(); + var a = item.Artist.RemoveCrap().ToLower(); + bool titleMatch = DoesMatch(bT, t); + if (titleMatch && DoesMatch(bA, a)) + { + song.FoundSongs.Add(item); + } + } + } + Console.WriteLine("Found ({0}) Song:{1} - {2}", song.FoundSongs.Count(), song.Artist, song.Title); + + } + } + + static bool DoesMatch(string primary, string toMatch) + { + if (primary.Contains(toMatch) || toMatch.Contains(primary)) { return true; } + int diff = primary.LevenshteinDistance(toMatch); + int distance = 4; + if (toMatch.Length < 6) { distance = 2; } + return diff < distance; } static SongList Download(string listName, string url) @@ -99,7 +187,8 @@ namespace BillboardPlaylistUpdater var parser = new HtmlParser(); var document = parser.Parse(html); var articles = document.QuerySelectorAll("article.chart-row"); - if(articles.Count() > 0) { + if (articles.Count() > 0) + { Console.WriteLine("Found " + articles.Count() + " Songs"); songs = new List(); } @@ -113,14 +202,15 @@ namespace BillboardPlaylistUpdater { artist = article.QuerySelector("h3.chart-row__artist"); } - if(title != null && artist != null && position != null) + if (title != null && artist != null && position != null) { SongListSong song = new SongListSong(); - song.Artist = WebUtility.HtmlDecode(artist.InnerHtml.Trim().Replace("\n","")); - song.Title = WebUtility.HtmlDecode(title.InnerHtml.Trim().Replace("\n","")); + song.Artist = WebUtility.HtmlDecode(artist.InnerHtml.Trim().Replace("\n", "")); + song.Title = WebUtility.HtmlDecode(title.InnerHtml.Trim().Replace("\n", "")); song.Position = Int32.Parse(position.InnerHtml.Trim()); songs.Add(song); - } else + } + else { Console.WriteLine("couldn't find objects in " + title + " for Song #" + i); } @@ -135,14 +225,15 @@ namespace BillboardPlaylistUpdater string data = null; HttpWebRequest request = (HttpWebRequest)WebRequest.Create(url); HttpWebResponse response = (HttpWebResponse)request.GetResponse(); - if(response.StatusCode == HttpStatusCode.OK) + if (response.StatusCode == HttpStatusCode.OK) { Stream receiveStream = response.GetResponseStream(); StreamReader readStream = null; - if(response.CharacterSet == null) + if (response.CharacterSet == null) { readStream = new StreamReader(receiveStream); - }else + } + else { readStream = new StreamReader(receiveStream, Encoding.GetEncoding(response.CharacterSet)); } @@ -152,6 +243,16 @@ namespace BillboardPlaylistUpdater readStream.Close(); } return data; - } + } + } + + static class StingExtension + { + public static string RemoveCrap(this String str) + { + string regex = "(\\[.*\\])|(\".*\")|('.*')|(\\(.*\\))"; + return Regex.Replace(str, regex, "").ToLower().Replace("ft.", "").Replace("feat.", "").Replace("featured", "").Replace("featuring", "").Replace("'", "").Replace(" "," ").Trim(); + } } } + diff --git a/Herse.Models/SongList.cs b/Herse.Models/SongList.cs index 1cb8eb1..a8f9583 100644 --- a/Herse.Models/SongList.cs +++ b/Herse.Models/SongList.cs @@ -11,6 +11,7 @@ namespace Herse.Models { [JsonProperty("title")] public string Title { get; set; } + [JsonProperty("songs")] public List Songs { get; set; } } @@ -24,5 +25,8 @@ namespace Herse.Models [JsonProperty("position")] public int Position { get; set; } + + [JsonProperty("foundSongs")] + public List FoundSongs { get; set; } = new List(); } }