diff --git a/Scripts/wikinext navbox and infobox parser to JSON.linq b/Scripts/wikinext navbox and infobox parser to JSON.linq new file mode 100644 index 0000000..265aab3 --- /dev/null +++ b/Scripts/wikinext navbox and infobox parser to JSON.linq @@ -0,0 +1,1637 @@ + + <RuntimeDirectory>\System.Web.dll + Newtonsoft.Json + Newtonsoft.Json.Linq + System.Collections.Specialized + System.Net + Newtonsoft.Json + System.Web + + +void Main() +{ + Util.AutoScrollResults = true; + + Console.Write(@" +This script uses a standard mediawiki navbox template to build JSON data for Flexbook. +Override string[] Categories to limit results for testing. + +Set the metadata variables for the template below. + +Complete: +* Tool item objects for basic tools +* Item name parsing for ignored names and wikilink syntax +====================================================================================== + "); + + // meta + OrderedDictionary meta = new OrderedDictionary(); + meta["author"] = "ClairelyClaire"; + meta["version"] = "0.1"; + meta["published"] = "1/4/2020"; + + int startCat = 0; + int subCount = Globals.Categories.Count() - startCat; + + // override startCat to start at category - count to category then subtract 1 + // override subCount to limit length, minimum size 1 + startCat = 19; + subCount = 1; + + string[] CategoryList = Globals.Categories.ToList().GetRange(startCat, subCount).ToArray(); + + // categories that need completely special processing + // Festivals, Locations, Seasons, SkillsStats + + OrderedDictionary sauce = new OrderedDictionary(); + + sauce["metadata"] = meta; + + List lstCategories = new List(); + + // create a new blank template + // OrderedDictionary template = new OrderedDictionary(); + //OrderedDictionary template = new OrderedDictionary(); + Dictionary template = new Dictionary(); + + // keyed template - helper for the template creator + SortedDictionary templateKeys = new SortedDictionary(); + + // every template must have a "name" parameter + template["name"] = "Name"; + + foreach (string strCategory in CategoryList) + { + Console.WriteLine("\n------------------------------\n" + strCategory + "\n------------------------------"); + + // this is the category name for the Navbox template on the wiki + string cat = strCategory; + + // remove spaces from Navbox template category name + if (strCategory.Contains(" ")) + cat = cat.Replace(" ",""); + + // return the Navbox wikitext + // this is what we have to parse to create subcategories and items + string wikitext = Wikidata(cat,"category"); + + // array of all lines in the wikitext; this is a wiki table so we can just split by newline + List source = wikitext.Split(new string[] { "\n" }, StringSplitOptions.None).ToList(); + + // set all our empty variables // + + // new array of lines to be processed by the data scraper + List lines = new List(); + + // holds the count of subcategories + // if nonzero, do special things + int rowcount = 0; + + // holds the previous line's value + string prev = ""; + + OrderedDictionary categorySauce = new OrderedDictionary(); + + // use Category, not cat - this is the display name + categorySauce["category"] = strCategory; + + List subcategorySauce = new List(); + + string currentSubcat = ""; + int currentSubI = -1; + + for (int i = 0; i < source.Count(); i++) + { + string line = source[i]; + + // if the line starts with a blacklisted string (item or subcategory), skip it + if (Globals.Avoid.Any(b => line.StartsWith(b))) + { + continue; + } + // this is a header, which means it's a subcategory + else if (line.StartsWith("!")) + { + // empty string for subcategory name + string subcat = ""; + // find rowspan for sub-subcategories + if (line.Contains("rowspan")) + { + // this means the given subcategory has its own children + // so get the subcategory name to prepend to the children + + // get the number of rows (sub-subcategories) + Match mt = Regex.Match(line, @"(?!\! rowspan=""{0,})[0-9]+(?=""{0,}.+)"); + + // set the number of rows (sub-subcategories) + rowcount = Convert.ToInt32(mt.Value); + + // now get the value of this line + mt = Regex.Match(line, @"(?<=\[\[).+(?=\]\])|(?<=\|)[A-Z][A-Za-z ]+"); + + prev = mt.Value.Trim(); + continue; + } + else + { + // this must be something worth processing + if (rowcount > 0 && prev != "") + { + // sub-subcategory + // example: Fishing - Bait + // this value is "Bait" + Match mt = Regex.Match(line, @"(?<=\[\[)(.+\||)(.+)(?=\]\])"); + + // blank string + + if (mt.Success) + { + string cur = mt.Groups[mt.Groups.Count - 1].Value; + + // create the new subcategory title + subcat = prev + " - " + cur; + } + else + { + mt = Regex.Match(line, @"(?!\|\s*)[A-Za-z ]+"); + + // create the new subcategory title + subcat = prev + " - " + mt.Value; + } + + // subtract subcategory count by 1 + rowcount--; + + if (rowcount <= 0) + { + rowcount = 0; + prev = ""; + } + } + else if (line.Contains("[[") && line.Contains("|")) + { + // ![[Wiki Link|Display Name]] + Match mt = Regex.Match(line, @"(?!.+\|\s*)[A-Za-z ]+"); + + subcat = mt.Groups[mt.Groups.Count - 1].Value.Trim(); + } + else if (line.Contains("[[")) + { + // ![[Wiki Link]] + Match mt = Regex.Match(line, @"(?<=\[\[).+(?=\]\])"); + + subcat = mt.Value.Trim(); + } + else if (line.Contains(cat) && !line.Contains("colspan")) + { + // subcategory equals category + // we know this isn't the navbox header row since there's no colspan + Match mt = Regex.Match(line, @"(?!\!.+\|\s*)[A-Za-z ]+"); + + subcat = mt.Value.Trim(); + } + else + { + + Match mt = Regex.Match(line, @"(?!.+\|\s*)[A-Za-z ]+"); + + subcat = mt.Value; + } + + if (subcat != "") + { + // create a new subcategory object and populate it with its name + OrderedDictionary sc = new OrderedDictionary(); + sc.Add("subcategory", subcat.Trim()); + subcategorySauce.Add(sc); + currentSubcat = subcat; + currentSubI++; + + lines.Add(subcat.Trim()); + } + } + } + else if (line.StartsWith("|")) + { + // if the property value is ever any of these, overwrite it with a blank string + string[] itemPropsBlacklist = { "", "|", "N/A", "\n" }; + + // THIS IS WHERE THE INFOBOX MAGIC HAPPENS + // pass mt.Value to the wiki API + Match mt = Regex.Match(line, @"(?<=\[\[).+?(?=\]\])"); + + List Items = new List(); + + while (mt.Success) + { + OrderedDictionary itemProps = new OrderedDictionary(); + + string val = mt.Value; + + if (!Globals.Avoid.Any(val.Contains)) + { + // if this is a party hat, do some special stuff + if (val.Contains("Party Hat")) + { + // split on the pipe + string[] props = val.Split(new string[] { "|" }, StringSplitOptions.None); + if (props[1] != "Party Hat") + { + itemProps["name"] = props[0]; + } + else + { + mt = mt.NextMatch(); + continue; + } + } + else if (val.Contains("|")) + { + // split on the pipe + string[] props = val.Split(new string[] { "|" }, StringSplitOptions.None); + itemProps["name"] = props[0]; + itemProps["display_name"] = props[1]; + } + else + { + itemProps["name"] = val; + } + + // get individual item infoboxes here + // check all values against itemPropsBlacklist + string strItem = itemProps["name"].ToString(); + + ItemBox(strItem, strCategory) + } + + // if itemProps is populated, add it to the Items list + if (itemProps.Count > 0) + { + Items.Add(itemProps); + lines.Add(val); + } + + mt = mt.NextMatch(); + } + + // if this is Tools > Basic, parse the tool pages separately + if (cat == "Tools" && subcategorySauce[currentSubI]["subcategory"].ToString() == "Basic") + { + ParseTools(ref itemPropsBlacklist, ref Items); + } + + // make sure items are alphabetized + Items.Sort((OrderedDictionary a, OrderedDictionary b) => a["name"].ToString().CompareTo(b["name"].ToString())); + + subcategorySauce[currentSubI]["items"] = Items; + } + else + { + // do nothing + } + categorySauce["subcategories"] = subcategorySauce; + + if (currentSubI >= 0 && currentSubI < subcategorySauce.Count()) + { + if (subcategorySauce[currentSubI]["items"] != null) + { + // subcategorySauce[currentSubI].Dump(); + } + } + } + + lstCategories.Add(categorySauce); + } + + // sort the template attributes? + List> template2 = template.ToList>(); + + template2.Sort((x,y) => x.Key.CompareTo(y.Key)); + + OrderedDictionary templateDict = new OrderedDictionary(); + + foreach (KeyValuePair kvp in template2) + { + templateDict[kvp.Key] = kvp.Value; + } + + string templateJSON = JsonConvert.SerializeObject(templateKeys, Newtonsoft.Json.Formatting.Indented); + + //templateJSON.Dump(); + + sauce["template"] = templateDict; + + sauce["content"] = new OrderedDictionary(); + + ((OrderedDictionary)sauce["content"])["categories"] = lstCategories; + + //sauce.Dump(); + + string jo = JsonConvert.SerializeObject(sauce, Newtonsoft.Json.Formatting.Indented); + + // save a new one if files already exist + // DirectoryInfo dataDir = new DirectoryInfo(@"F:\Projects\Flexbook\Test Data"); + string pcname = Environment.MachineName.ToLower(); + string drv = "C"; + + switch (pcname) + { + case "breve": + drv = "D"; + break; + case "zelda": + drv = "F"; + break; + } + string dir = drv + @":\Projects\Flexbook\Test Data"; + // dir = @"D:\Source\Flexbook\Test Data"; + + DirectoryInfo dataDir = new DirectoryInfo(dir); + FileInfo[] Files = dataDir.GetFiles("stardewsample-all*.json"); + + File.WriteAllText(dir + @"\stardewsample-all-" + Files.Count() + ".json", jo); + + // build the template key file + string jtk = JsonConvert.SerializeObject(templateKeys, Newtonsoft.Json.Formatting.Indented); + File.WriteAllText(dir + @"\stardewsample-all-" + Files.Count() + "-template-keys.json", jtk); +} + + +// needs to be a void with a ref to the existing item dictionary, to which items should be added +// also refs to template and templateKeys +OrderedDictionary ItemBox(string strItem, string strCategory) +{ + // skip if the node exists in our blacklist + if (!Globals.Avoid.Contains(strItem) && strItem != Globals.Categories[0]) + { + // get first section of item's wiki page + // this contains the infobox + // this returns JSON, not XML + string strWiki = Wikidata(strItem, "item"); + + // strip out the content between the starting {{ and the first line break + Regex rgx = new Regex("Infobox.*"); + string strRes = rgx.Replace(strWiki, ""); + + // strip out everything past the closing }} + // find index of closing }}, which is always preceded by a newline + string strRep2 = "\n}}"; + int intStart = strRes.IndexOf(strRep2); + + if (intStart >= 0) + { + // extra pipe in the replacement string gives us the right formatting for our object + strRes = strRes.Replace(strRes.Substring(intStart), "\n|}}"); + } + + // split the string by the leading | on each line + string[] strSep = { "\n|" }; + string[] strSplit = strRes.Split(strSep, System.StringSplitOptions.RemoveEmptyEntries); + + string[] strSplit2 = { "=" }; + + // instantiate new item, which contains the item name and a dynamic object containing its properties + OrderedDictionary itm = new OrderedDictionary(); + + OrderedDictionary ordProps = new OrderedDictionary(); + + ordProps["name"] = strItem; + + // String manipulation!! + foreach (string s in strSplit) + { + string[] strElements = s.Split(strSplit2, 2, System.StringSplitOptions.RemoveEmptyEntries); + + char[] strTrim = { ' ', '\n' }; + + int i = 0; + + foreach (var s2 in strElements.ToList()) + { + string s3 = s2.TrimStart(strTrim); + string s4 = s3.TrimEnd(strTrim); + + strElements[i] = s4; + i++; + } + + if (strElements.Length > 1) + { + string propertyName = strElements[0]; + dynamic propertyValue = strElements[1]; + + //if (propertyName == "source") + if (new[] { + "source", "as", "dr", "md", "os", + "ingredients", "tingredients", "season", "produce", "drops", + "location", "occupants", "materials", "animals", + "favorites", "family", "friends", + "buff", "seed" }.Contains(propertyName)) + { + switch (propertyName) + { + case "source": + propertyName = "sources"; + break; + case "as": + propertyName = "wigglies"; + break; + case "dr": + propertyName = "reward"; + break; + case "md": + propertyName = "drops"; + break; + case "os": + propertyName = "othersrc"; + break; + } + + List sourceList = new List(); + string s2 = s; + + if (s2.Contains("[[") || s2.Contains("{{")) + { + parseSource(s2, "[[", "]]", ref sourceList); + parseSource(s2, "{{", "}}", ref sourceList); + } + else if (s2.Contains("=")) + { + sourceList.Add(propertyValue); + } + else + { + sourceList.Add(s2); + } + sourceList.Sort(); + + propertyValue = sourceList.ToArray(); + } + else if (propertyName == "profession") + { + // this is probably a comma-separated list + // it's used by the infobox to calculate prices + string[] list = (propertyValue as string).Split(new string[] { "," }, StringSplitOptions.None); + propertyValue = list; + } + else + { + propertyValue = parseString(propertyValue); + } + + // fix price key name + propertyName = propertyName == "price" ? "sellprice" : propertyName; + + + // strip double apostrophes from strings + if (propertyValue is string && propertyValue.Contains("''")) + { + propertyValue = (propertyValue as string).Replace("''", ""); + } + + //ordProps.Add(propertyName, propertyValue); + ordProps[propertyName] = propertyValue; + } + } + + // build description template + // {{Description|Prismatic%20Shard}} + string descTemplate = "{{Description|" + strItem + "}}"; + + // add a description + ordProps["description"] = Wikidata(descTemplate, "wikitext"); ; + + // this is where we can alphabetize the properties if we really want to + //itm.ItemProps = new SortedDictionary(dynProps); + itm = ordProps; + + // add profession property where necessary + if (itm["name"].ToString() == "Dinosaur Egg") + { + itm["profession"] = new string[] { "Rancher", "Artisan" }; + } + + // if it's a fish, add the right professions + if (strCategory == "Fish") + { + itm["profession"] = new string[] { "Fisher", "Angler" }; + } + + // calculate quality prices + if ((string)itm["quality"] == "true" || (string)itm["iridium"] == "true") + { + GetPrices(ref itm); + } + + // now we can do shit with the price based on profession + if (itm["profession"] != null && itm["sellprice"] != null) + { + // calculate price based on profession + string[] professions = (string[])itm["profession"]; + + foreach (string profession in professions) + { + GetPrices(ref itm, profession.ToLower()); + } + } + + // add a type of vegetable-p to hops, wheat, and tea leaves0 + if (new[] { "Hops", "Wheat", "Tea Leaves" }.Contains(itm["name"].ToString())) + { + itm["type"] = "vegetable-p"; + } + + // add a type of vegetable to corn + if (itm["name"].ToString() == "Corn") + { + itm["type"] = "vegetable"; + } + + // add a type of flower to members of Flowers + if (Globals.Flowers.Contains(itm["name"].ToString())) + { + itm["type"] = "flower"; + } + + string artPrice(object p) + { + string pInt = Math.Truncate(Convert.ToInt32(p) * 1.4).ToString(); + return pInt; + }; + + if (itm["type"] != null) + { + // get the base sell price + int sprice = Convert.ToInt32(itm["sellprice"]); + + // if it's a fruit or vegetable, calculate price of artisan good + // fruit + if (itm["type"].ToString() == "fruit") + { + // this can be made into jelly and wine + // wine is available in all qualities + // jelly = (1 * 2) + 50 + // wine = 1 * 3 + + // jelly + itm["price_jelly"] = ((sprice * 2) + 50).ToString(); + itm["price_jelly_artisan"] = artPrice(itm["price_jelly"]); + + // wine + GetPrices(ref itm, "wine_"); + } + // vegetable, vegetable-p + else if (itm["type"].ToString().StartsWith("vegetable")) + { + // this can be made into pickles and juice + // pickles = (1 * 2) + 50 + // juice = 1 * 2.25 + + // pickles + itm["price_pickles"] = ((sprice * 2) + 50).ToString(); + itm["price_pickles_artisan"] = artPrice(itm["price_pickles"]); + + if (itm["type"].ToString() == "vegetable") + { + itm["price_juice"] = ((sprice * 2) + 50).ToString(); + itm["price_juice_artisan"] = artPrice(itm["price_juice"]); + } + } + // flower + else if (itm["type"].ToString() == "flower") + { + // 100g + (base x 2) + itm["price_honey"] = (100 + (sprice * 2)).ToString(); + itm["price_honey_artisan"] = artPrice(itm["price_honey"]); + } + } + + // if there's at least one artisan good associated with this + // get that good's data and append it to the item + if (Globals.ArtisanGoods.Contains(itm["name"])) + { + // get the name of the artisan good from the dictionary + string ArtGoodName = Globals.ArtisanGoods[itm["name"].ToString()].ToString(); + + // add a property to the item for the name of the artisan good + itm["artisan_good"] = ArtGoodName; + + // generate the infobox data for the artisan good + // we'll take the prices from this + OrderedDictionary ArtGood = ItemBox(ArtGoodName, strCategory); + + // we can't easily filter an OrderedDictionary's keys + // convert list of keys to string array + string[] ArtGoodKeys = new string[ArtGood.Count]; + // copy all the keys to the array + ArtGood.Keys.CopyTo(ArtGoodKeys, 0); + + // filter for entities named "price_..." + IEnumerable AGKeys = ArtGoodKeys.Where(agk => agk.Contains("price_")); + + // for each key in the filtered list, retrieve the key-value pair from the infobox + foreach (string k in AGKeys) + { + int theValue = Convert.ToInt32(ArtGood[k]); + // this use the product name, we probably don't want his + string artName = (ArtGoodName.ToLower()).Replace(" ", "_") + "_"; + // set this part of the key to "product" instead + artName = "product_"; + string theKey = k.Replace("price_", "price_" + artName); + + itm[theKey] = theValue.ToString(); ; + } + } + + //itm.Dump(); + + // add to object array + // AllItems.Add(itm); + // JsonConvert.SerializeObject(ordProps, Newtonsoft.Json.Formatting.Indented).Dump(); + return ordProps; + } + else + { + return null; + } +} + +/// +/// Appends quality prices to item OrderedDictionary: GetPrices(ref item, string profession) // set profession to "wine_" for wine pricing +/// +void GetPrices(ref OrderedDictionary itm, string pro = null) +{ + // get the base price from the item + int price = Convert.ToInt32(itm["sellprice"]); + + // set the base multiplier + decimal multi = 1; + + // set the string prefix for key names + string pre = "price_"; + + // if a profession is defined, use its multiplier and add a prefix + if (pro != null) + { + if (pro == "wine_") + { + multi *= 3; + pre += pro; + } + else + { + multi *= Convert.ToDecimal(Globals.Professions[pro]); + pre += pro.ToLower() + "_"; + } + } + + // calculate prices and add key+value pairs to item + int i = 0; + foreach (DictionaryEntry qual in Globals.Quality) + { + string key = pre + i + "_" + qual.Key; + decimal newprice = price * Convert.ToDecimal(qual.Value) * multi; + itm[key] = Math.Truncate(newprice).ToString(); + i++; + } + + // calculate artisan prices for wine + if (pro == "wine_") + { + i = 0; + foreach (DictionaryEntry qual in Globals.Quality) + { + string key = pre + "artisan_" + i + "_" + qual.Key; + decimal bprice = Convert.ToDecimal(itm["price_wine_" + i + "_" + qual.Key]); + decimal newprice = bprice * Convert.ToDecimal(1.4); + itm[key] = Math.Truncate(newprice).ToString(); + i++; + } + } + + if ((string)itm["iridium"] != "true") + { + itm.Remove(pre + "3_iridium"); + } +} + +/// +/// Appends health and energy buffs to item OrderedDictionary: GetBuffs(ref item) +/// +void GetBuffs(ref OrderedDictionary itm) +{ + // get base edibility score + int basic = Convert.ToInt32(itm["edibility"]); + + // if <= -300, shit ain't edible + if (basic <= -300) + { + itm["edible"] = "false"; + } + else + { + itm["edible"] = "true"; + + // if > -300 AND if < 0 + if (basic > -300 && basic < 0) + { + foreach (DictionaryEntry m in Globals.BuffQuality) + { + string key = m.Key.ToString(); + int quality = Convert.ToInt32(m.Value); + + //now we do math for energy + int value = Convert.ToInt16(Math.Ceiling(basic * 2.5) + basic * quality); + + itm["energy_" + m.Value + "_" + key] = value; + } + } + else if (basic == 0) + { + // this shit's worthless, yo + itm["energy_base"] = 0; + + } + else if (basic > 0) + { + foreach (DictionaryEntry m in Globals.BuffQuality) + { + string key = m.Key.ToString(); + int quality = Convert.ToInt32(m.Value); + + // now we do math for energy + int evalue = Convert.ToInt16(Math.Truncate(Math.Ceiling(basic * 2.5) + basic * quality)); + int hvalue = Convert.ToInt16(Math.Truncate(evalue * 0.45)); + + itm["energy_" + m.Value + "_" + key] = evalue; + itm["health_" + m.Value + "_" + key] = hvalue; + } + } + else + { + // this is so wrong + // how the fuck did you even get here? + } + } +} + +/// +/// Returns a string of wikitext by search string and query type +/// +String Wikidata(string strInput, string strType) +{ + string strURL = "https://stardewvalleywiki.com/mediawiki/api.php?action=query&prop=revisions&rvprop=content&format=json&rvsection=1&titles="; + + // create the URL based on the type + // set the XML node path + switch (strType) + { + case "item": + strURL = "http://stardewvalleywiki.com/mediawiki/api.php?action=query&prop=revisions&rvprop=content&format=json&rvsection=0&titles=" + strInput; + // "[query][pages][$pageid$][revisions][0][*]"; + break; + case "image": + strURL = "http://stardewvalleywiki.com/mediawiki/api.php?action=query&prop=imageinfo&iiprop=url&continue=&format=json&titles=File:" + strInput; + // "[query][pages][$pageid$][imageinfo][0][url]"; // this is the actual image file from the wiki + break; + case "category": + strURL = "https://stardewvalleywiki.com/mediawiki/api.php?action=query&prop=revisions&rvprop=content&format=json&rvsection=0&titles=Template:Navbox" + strInput; + // "[query][pages][$pageid$][revisions][0][*]"; + break; + case "wikitext": + strURL = "https://stardewvalleywiki.com/mediawiki/api.php?action=expandtemplates&prop=wikitext&format=json&text=" + strInput; + // "[expandtemplates][wikitext]"; + break; + default: + break; + } + + string raw = new WebClient().DownloadString(strURL); + + JObject obj = JObject.Parse(raw.ToString()); + + JObject objPage = obj; + string pageid = ""; + + if (strType != "wikitext") + { + objPage = (JObject)obj["query"]["pages"]; + + pageid = ""; + + foreach (JProperty p in objPage.Properties()) + { + if (pageid == "") + { + pageid = p.Name; + } + else + { + break; + } + } + } + + switch (strType) + { + case "item": + case "category": + return objPage[pageid]["revisions"][0]["*"].ToString(); + case "image": + return objPage[pageid]["imageinfo"][0]["url"].ToString(); + case "wikitext": + return objPage["expandtemplates"]["wikitext"].ToString(); + default: + return ""; + } +} + +/// +/// Special parser for tool information +/// +void ParseTools(ref string[] blacklist, ref List items) +{ + // the master list of tools + List Tools = new List(); + + // tool groups + string[] strTools = { "Axes", "Hoes", "Pickaxes", "Trash Cans", "Watering Cans" }; + + foreach (string strTool in strTools) + { + // the URL to hit for information + string strURL = "https://stardewvalleywiki.com/mediawiki/api.php?action=query&prop=revisions&rvprop=content&format=json&rvsection=1&titles=" + strTool; + + // the raw JSON output from the API response + var toolRaw = new WebClient().DownloadString(strURL); + + // parse the output into a JSON object model + JObject toolObj = JObject.Parse(toolRaw.ToString()); + + // get the page element + JObject toolPage = (JObject)toolObj["query"]["pages"]; + + // we have to find the page ID, which is super annoying + string toolPageId = ""; + foreach (JProperty p in toolPage.Properties()) + { + if (toolPageId == "") + { + toolPageId = p.Name; + } + else + { + break; + } + } + + // ok now we have the page ID + // get the page content + string strInput = toolPage[toolPageId]["revisions"][0]["*"].ToString(); + + // if there's content, process it + if (strInput.Length > 0) + { + if (strInput.Contains("wikitable")) + { + // this is wikitable markup + // convert into objects + // find the column headers + // these are the object properties + Match headerMatches = Regex.Match(strInput, @"(?<=\n\!)(.+)"); + + List lstHeaders = new List(); + + // we need this information to know where the table content starts + int hIdx = 0; + int hLen = 0; + + // create a list of keys from wikitable headers + while (headerMatches.Success) + { + string val = headerMatches.Value.Trim().ToLower(); + + hLen = headerMatches.Length; + hIdx = headerMatches.Index; + + lstHeaders.Add(val == "improvements" ? "notes" : val); + + headerMatches = headerMatches.NextMatch(); + } + + // extract the table rows + // +4 for the \n|-\n at the beginning of this section + int startI = hIdx + hLen + 4; + int endI = strInput.Length - startI; + + // this is the wikitext for the table rows + string wikiRows = strInput.Substring(startI, endI).Trim(); + + // split the string by the wikitext markup for a new row: |- + List lstRows = wikiRows.Split(new string[] { "\n|-\n" }, StringSplitOptions.None).ToList(); + + foreach (string row in lstRows) + { + // split this into a list by regex + // why? because some random bitch on the stardew valley wiki refuses to allow a single-line syntax on the affected pages + // literally. + + // List rowcols = row.Split(new string[] { "\n|"}, StringSplitOptions.None).ToList(); + + List rowcols = Regex.Split(row, @"\n\|").ToList(); + + // empty array of column values for this row + List newcols = new List(); + + OrderedDictionary tool = new OrderedDictionary(); + + int toolIndex = 0; + + foreach (string rowcol in rowcols) + { + string outval = ""; + string newval = rowcol; + + if (rowcol.StartsWith("|")) + { + newval = rowcol.Substring(1); + } + else + { + newval = rowcol; + } + + if (!blacklist.Contains(rowcol)) + { + // we have a column entry to parse + if (newval.StartsWith("}")) + { + // this is the end of the table, so skip it + continue; + } + else if (newval.Contains("[")) + { + // this is a link of some kind + // extract all links matching regex + // if link has a pipe, split it + // if link is a file, use the image name as the "image" dictionary entry and discard anything after the first match + // if the link isn't a file, use the displayname (second match) + Match linkMts = Regex.Match(newval, @"(?<=\[\[).+(?=\]\])"); + + string tempval = ""; + + while (linkMts.Success) + { + // check for File: first + // then check for pipe without File: + string linktext = linkMts.Value; + + if (linktext.Contains("File:")) + { + Match fileMts = Regex.Match(linktext, @"(?<=File:)[\w .]+"); + tempval = newval.Replace(linktext, fileMts.Value); + } + else if (linkMts.Value.Contains("|")) + { + string[] linkProps = linkMts.Value.Split(new string[] { "|" }, StringSplitOptions.None); + if (linkProps[0].Contains("File")) + { + Match fileMts = Regex.Match(linkProps[0], @"(?<=File:).+"); + + // this is the image name + tempval = newval.Replace(linkProps[0], fileMts.Value); + } + else + { + tempval = newval.Replace(linkMts.Value, linkProps[1]); + } + } + linkMts = linkMts.NextMatch(); + } + + outval = tempval.Replace("[", "").Replace("]", ""); + } + else if (newval.Contains("{")) + { + // there's data to be extracted here + // see if it's a template + Match tplMts = Regex.Match(newval, @"(?<=\{\{).+(?=\}\})"); + + while (tplMts.Success) + { + // split the value by pipe + string[] splitVals = tplMts.Value.Split(new string[] { "|" }, StringSplitOptions.None); + + if (splitVals.Count() == 3) + { + // if array has three members, output like: Copper Bar (5) + outval = splitVals[1] + " (" + splitVals[2] + ")"; + } + else if (splitVals.Length == 2) + { + // if the array has two members, ouput the second + outval = splitVals[1]; + } + else + { + //this is wrong + } + + tplMts = tplMts.NextMatch(); + } + } + else + { + // use the value as-is + outval = newval; + } + } + else + { + // this is an empty column, use a space + outval = " "; + + } + + // clean up extraneous wikitext markup + outval = outval.Replace("'''", "").Replace("''", "").Replace("\n\n", "\n"); + + tool.Add(lstHeaders[toolIndex], outval); + + toolIndex++; + + newcols.Add(outval); + } + Console.WriteLine(tool["name"]); + tool.Dump(); + items.Add(tool); + } + } + } + } +} + +/// +/// Single-line string parser +/// +string parseString(string s) +{ + // the default output is the input string + string result = s; + + // replace all HTML entities with characters + result = HttpUtility.HtmlDecode(s); + + // if the string contains wikitext markup, parse it + // otherwise, return propertyValue + if (new string[] { "[[", "{{" }.Any(str => s.Contains(str))) + { + if (s.Contains("[[")) + { + // this string has one or more [[wikilinks]] + // extract all [[wikilinks]] + Match m = Regex.Match(s, @"\[\[(.+?)\]\]"); // .+? means lazy (ungreedy) matching + + while (m.Success) + { + // check for File: first + // then check for pipe without File: + string linktext = m.Value; + + if (linktext.Contains("File")) + { + // this link contains a file. do things. + if (linktext.Contains("Level")) + { + // [[File:Fishing Skill Icon.png|24px|link=]] [[Fishing]] Level 2 + // Fishing]] Level 2 + // retrieve whole wikilink and concatenate second and third values + Match mx = Regex.Match(linktext, @"\[\[File.+?\]\] \[\[(.+)\]\]( Level [0-9]+)", RegexOptions.IgnoreCase); + + // for every match, perform string replacement + while (mx.Success) + { + // Fishing Level 2 + string strOld = mx.Groups[1].Captures[0].Value; + string strNew = mx.Groups[2].Captures[0].Value; + + result = result.Replace(strOld, strNew); + + mx = mx.NextMatch(); + } + } + else + { + // [[File:Shirt001.png|center]] + // [[File:Axe.png]] + // retrieve whole wikilink, second capture group is the filename + Match mx = Regex.Match(linktext, @"\[\[File:([\w -_.]+).*?\]\]", RegexOptions.IgnoreCase); + + // for every match, perform string replacement + while (mx.Success) + { + // Shirt001.png + // Axe.png + string strOld = mx.Groups[0].Captures[0].Value; + string strNew = mx.Groups[1].Captures[0].Value; + + result = result.Replace(strOld, strNew); + + mx = mx.NextMatch(); + } + } + } + else + { + // [[The Mines]] + // [[Random Events#Meteorite|meteorite]] + // this is a regular ol' link + if (linktext.Contains("|")) + { + string[] linkProps = m.Value.Split(new string[] { "|" }, StringSplitOptions.None); + result = result.Replace(m.Value, linkProps[1]); + } + else + { + result = result.Replace(m.Value, m.Value.Substring(2, m.Value.Length - 3)); + } + } + + m = m.NextMatch(); + } + } + + if (s.Contains("{{")) + { + // this string has one or more {{wikitext templates}} + // extract all {{wikitext templates}} + + Match m = Regex.Match(s, @"\{\{(.+?)\}\}"); // .+? means lazy (ungreedy) matching + + while (m.Success) + { + // this is the raw wikitext of the match + string wikitext = m.Value; + + // this is the string minus the start and end brackets + string input = wikitext.Substring(2, wikitext.Length - 4); + + // return this if all else fails + result = input; + + // we can just split by the pipe for this - all {{wikitext templates}} use pipe as the delimiter + string[] segments = input.Split(new string[] { "|" }, StringSplitOptions.RemoveEmptyEntries); + + // for case-insensitive string.Contains() + string templateName = segments[0].ToLower(); + string littletext = wikitext.ToLower(); + + if (segments.Count() > 1) + { + if (templateName == "name") + { + + if (segments.Count() == 2) + { + // return second segment as-is + result = segments[1]; + } + else + { + // {{name|Mining|Level 9|class=inline}} + // everything past the third segment (e.g. 50) can be ignored + if (littletext.Contains("level")) + { + // {{name|Farming|Level 3|image=Farming Skill Icon.png}} + // Farming Level 3 + // test with Equipment + result = segments[1] + " " + segments[2]; + } + else if (littletext.Contains("+")) + { + // {{name|Defense|+3} + // +3 Defense + // test with Clothing and Weapons + result = segments[2] + " " + segments[1]; + } + else + { + // {{name|Omni Geode|50|...}} + // Omni Geode {50) + result = segments[1] + " (" + segments[2] + ")"; + } + } + } + else if (templateName == "npc") + { + // {{NPC|Jodi|Mother}} + // Jodi (Mother) + if (segments.Count() > 2) + result = segments[1] + " (" + segments[2] + ")"; + else + result = segments[1]; + } + else if (templateName == "price") + { + // {{Price|30|Currency}} + // JOPK, Qi, Star Token (if third parameter is empty, this is regular gold) + result = segments[1]; + + if (segments.Count() > 2) + { + switch (segments[2]) + { + case "JOPK": + result += " tokens"; + break; + case "Qi": + result += " Qui coins"; + break; + case "Token": + result += " star tokens"; + break; + } + } + else + { + result += "g"; + } + } + else if (templateName == "description") + { + // {{Description|Wild Horseradish}} + // {{Description|Recipe|Lucky Lunch}} + + // this returns a description string + // use this API call: + // https://stardewvalleywiki.com/mediawiki/api.php?action=expandtemplates&prop=wikitext&format=json&text=... + // result["expandtemplates"]["wikitext"] + + result = Wikidata(wikitext, "wikitext"); + + } + else + { + // this is wrong + result += " ***BAD INPUT***"; + } + + m = m.NextMatch(); + } + } + } + } + + // clean

tags from result + result = result.Replace("

", @"\n"); + result = result.Replace("

", ""); + + return result; +} + +/// +/// String parser for delimited lists +/// +static void parseSource(string strInput, string strStart, string strEnd, ref List lstSauce) +{ + if (strInput.Contains("Abandoned House Icon.png") && !lstSauce.Contains("Hat Mouse")) + { + lstSauce.Add("Hat Mouse"); + return; + } + // add blacklisted entries here + List avoid = new List() { }; + + // clean string of extraneous characters that break shit + strInput = strInput.Replace("{{!}}", "").Replace("''", "").Replace("'''", ""); + + while (strInput.IndexOf(strStart) > -1) + { + // trim any whitespace + strInput = strInput.Trim(); + + // find the first instance of the start characters + int start = strInput.IndexOf(strStart); + + // find the first instance of the end characters + int end = strInput.IndexOf(strEnd) + 2; + + // get the length of the string to be extracted + int length = end - start; + + // extract the string + string sub = strInput.Substring(start, length); + + // remove the start and end characters + sub = sub.Substring(2, (sub.Length - 4)); + + // remove the extracted string + strInput = strInput.Remove(start, length); + + // clean up the substring + // don't include items that match the blacklist + sub = sub.Replace("''", ""); + + //"name|Fishing|Level 2|image=Fishing Skill Icon.png" + //"name|Bug Meat|1" + + if (!avoid.Any(sub.Contains)) + { + if (sub.Contains("|")) + { + string[] strSep = { "|" }; + string[] subSplit = sub.Split(strSep, System.StringSplitOptions.RemoveEmptyEntries); + if (sub.Contains("Bundle")) + { + sub = subSplit[1] + " Bundle"; + } + else if (strInput.Contains("ingredients")) + { + sub = subSplit[1]; + sub += subSplit.Count() == 3 ? " (" + subSplit[2] + ")" : ""; + } + else if (strInput.Contains("recipe")) + { + Console.Write(sub, " | "); + } + else + { + sub = subSplit[1]; + } + } + + lstSauce.Add(sub); + } + } + + // if (lstSauce.Count() == 1) + // { + // lstSauce.Add(null); + // } +} + +// function for parsing single line values +// this needs reworking +// bunch of regex all at once is fake & gay +string parseStringOld(string propertyValue) +{ + if (propertyValue.Contains("Traveling Cart")) + { + //Util.Break(); // there's something funny + } + + // {{name=Skill|Level #|imge=...}} + Match m_new = Regex.Match(propertyValue, @"(?<={{name\|)[\w\s']+\|Level [0-9]+(?=\|.+)"); + + // [[File:...]] [[Skill]] Level # + Match m_old = Regex.Match(propertyValue, @"(?!\[\[File.+\]\] \[\[)\w+\]\] Level [0-9]+", RegexOptions.IgnoreCase); + + // {{NPC|...|...[[..]]}} + // {{NPC|...|...}} + Match m_npc = Regex.Match(propertyValue, @"(?<=\{\{NPC\|)[A-Za-z0-9 \-\+\|]+(?=\[\[|\}\})"); + + // {{description|...}} + Match m_desc = Regex.Match(propertyValue, @"(?<=\{\{description\|)[A-Za-z0-9 \-\+\|]+(?=|\}\})", RegexOptions.IgnoreCase); + + // clean up wikilink syntax + Match m_link = Regex.Match(propertyValue, @"(?<=\[\[).+?(?=\]\])"); // .+? means lazy (ungreedy) matching + + // remove wikilink brackets + Match m_bckt = Regex.Match(propertyValue, @"(?<=\[\[)[\w\s']+(?=\]\])"); + + // {{name|Defense|+3}} + Match m_stats = Regex.Match(propertyValue, @"(?<=\{\{name\|)[\w\s']+\|\+[0-9]+(?=\}\})"); + + // {{price|15000}} + Match m_price = Regex.Match(propertyValue, @"(?<=\{\{price\|)[\w\s']+(?=\}\})"); + + // {{name|Omni Geode|50}} + Match m_quantity = Regex.Match(propertyValue, @"(?<=\{\{name\|)[\w\s']+\|\+[0-9]+(?=\}\})"); + + // {{name|...}} + Match m_curly = Regex.Match(propertyValue, @"(?<=\{\{name\|)[\w\s']+(?=\||\})"); + + string result = propertyValue; + + + + if (m_new.Success) + { + result = m_new.Value.Replace("|", " "); + } + else if (m_old.Success) + { + result = m_old.Value.Replace("]]", ""); + } + else if (m_npc.Success) + { + result = m_npc.Value.Replace("|", " - "); + if (result.Contains("+")) + { + result += "hearts"; + } + } + else if (m_desc.Success) + { + result = m_desc.Value; + } + else if (m_link.Success) + { + string tempval = propertyValue; + + while (m_link.Success) + { + // check for File: first + // then check for pipe without File: + string linktext = m_link.Value; + + if (linktext.Contains("File:")) + { + Match fileMts = Regex.Match(linktext, @"(?<=File:)[\w .]+"); + tempval = tempval.Replace(linktext, fileMts.Value); + } + else if (linktext.Contains("|")) + { + string[] linkProps = m_link.Value.Split(new string[] { "|" }, StringSplitOptions.None); + tempval = tempval.Replace(m_link.Value, linkProps[1]); + } + m_link = m_link.NextMatch(); + } + + result = tempval.Replace("[", "").Replace("]", ""); + } + else if (m_bckt.Success) + { + result = (propertyValue.Replace("[", "")).Replace("]", ""); + } + else if (m_stats.Success) + { + List stats = new List(); + + // there might be more than one + while (m_stats.Success) + { + // split value by pipe + // display as val[1] + " " + val[0] + string[] props = m_stats.Value.Split(new string[] { "|" }, StringSplitOptions.None); + stats.Add((props[1] + " " + props[0]).Replace("{", "").Replace("}", "")); + + m_stats = m_stats.NextMatch(); + } + + // output as multiline string + result = String.Join("\n", stats); + } + else if (m_price.Success) + { + string price = "{{price|" + m_price.Value + "}}"; + result = propertyValue.Replace(price, m_price.Value); + } + else if (m_quantity.Success) + { + + } + else if (m_curly.Success) + { + result = m_curly.Value; + } + + return result; +} + +/// +/// global constants for different functions +/// +public class Globals +{ + /// + /// list of blacklisted items by name - these will be skipped + /// + public static List Avoid = new List + { + // wikitext characters + "<", "{", "|-", "|}", + // ignored items + "Brown Egg", + "Animals#", + "Stone Owl", + "Panda Hat", + // these tools are parsed separately + "Axes", + "Hoes", + "Pickaxes", + "Trash Cans", + "Watering Cans" + // these are broken and need to be fixed + "Coop", + "Barn", + "Haunted Skull", + "Jellies and Pickles", + "Shed", + "Slime" + }; + + /// + /// list of categories for navbox wikitext parsing + /// + public static string[] Categories = + { + "Animals", // 0 + "Artifacts", // 1 + "Artisan Goods", // 2 + "Buildings", // 3 + "Clothing", // 4 + "Crop", // 5 + "Decor", // 6 + "Equipment", // 7 + "Fish", // 8 + "Foraging", // 9 + "Furniture", // 10 + "Ingredients", // 11 + "Lighting", // 12 + "Minerals", // 13 + "Monsters", // 14 + "Recipes", // 15 + "Resources", // 16 + "Seeds", // 17 + "Tree", // 18 + "Tools", // 19 + "Villagers", // 20 + "Warp Totems", // 21 + "Weapons" // 22 + }; + + /// + /// Price multiplier for each Quality rating + /// + public static OrderedDictionary Quality = new OrderedDictionary + { + { "base", 1 }, + { "silver", 1.25 }, + { "gold", 1.5 }, + { "iridium", 2 } + }; + + public static OrderedDictionary BuffQuality = new OrderedDictionary + { + { "base", 0 }, + { "silver", 1 }, + { "gold", 2 }, + { "iridium", 4 } + }; + + /// + /// Price multiplier for each Profession + /// + public static OrderedDictionary Professions = new OrderedDictionary + { + { "artisan", 1.4 }, + { "rancher", 1.2 }, + { "gemologist", 1.3 }, + { "tiller", 1.1 }, + { "blacksmith", 1.5 }, + { "forester", 1.25 }, + { "fisher", 1.25 }, + { "angler", 1.5 } + }; + + /// + /// Produce-to-ArtisanGood relationship + /// + public static OrderedDictionary ArtisanGoods = new OrderedDictionary + { + { "Hops", "Pale Ale" }, + { "Wheat", "Beer" }, + { "Honey", "Mead" }, + { "Milk", "Cheese" }, + { "Large Milk", "Cheese" }, + { "Goat Milk", "Goat Cheese" }, + { "Large Goat Milk", "Goat Cheese" }, + { "Coffee Bean", "Coffee" }, + { "Tea Leaves", "Green Tea" }, + { "Wool", "Cloth" }, + { "Egg", "Mayonnaise" }, + { "Large Egg", "Mayonnaise" }, + { "Void Egg", "Void Mayonnaise" }, + { "Dinosaur Egg", "Dinosaur Mayonnaise" }, + { "Truffle", "Truffle Oil" }, + { "Corn", "Oil" }, + { "Sunflower", "Oil" }, + { "Sunflower Seeds", "Oil" }, + { "Sturgeon Roe", "Caviar" }, + { "Roe", "Aged Roe" }, + }; + + /// + /// List of flowers for honey + /// + public static List Flowers = new List + { + { "Blue Jazz" }, + { "Fairy Rose" }, + { "Poppy" }, + { "Summer Spangle" }, + { "Sunflower" }, + { "Tulip" } + }; +} \ No newline at end of file diff --git a/StardewScraper/Program.cs b/StardewScraper/Program.cs index 04939c5..d3900e9 100644 --- a/StardewScraper/Program.cs +++ b/StardewScraper/Program.cs @@ -1,4 +1,13 @@ -using System; +using Newtonsoft.Json; +using Newtonsoft.Json.Linq; +using System; +using System.Collections; +using System.Collections.Generic; +using System.Collections.Specialized; +using System.Linq; +using System.Net; +using System.Text.RegularExpressions; +using System.Web; namespace StardewScraper { @@ -6,7 +15,987 @@ namespace StardewScraper { static void Main(string[] args) { - Console.WriteLine("Hello World!"); - } - } + Dictionary dataSet = new Dictionary(); + + foreach (string category in Globals.Categories) + { + + string[] itemList = { "Hops", "Grape" }; + + List Items = new List(); + + // for each item in list of items - needs to be changed for JSON support + foreach (string Item in itemList) + { + String strItem = Item; + OrderedDictionary TheItem = ItemBox(strItem, category); + Items.Add(TheItem); + } + + //AllItems.Dump(); + //AllJson.Dump(); + dataSet[category] = Items; + } + + string allJson = JsonConvert.SerializeObject(dataSet, Newtonsoft.Json.Formatting.Indented); + } + + static OrderedDictionary ItemBox(string strItem, string Category) + { + // skip if the node exists in our blacklist + if (!Globals.avoid.Contains(strItem) && strItem != Globals.Categories[0]) + { + // get first section of item's wiki page + // this contains the infobox + // this returns JSON, not XML + string strWiki = Wikidata(strItem, "item"); + + // strip out the content between the starting {{ and the first line break + Regex rgx = new Regex("Infobox.*"); + string strRes = rgx.Replace(strWiki, ""); + + // strip out everything past the closing }} + // find index of closing }}, which is always preceded by a newline + string strRep2 = "\n}}"; + int intStart = strRes.IndexOf(strRep2); + + if (intStart >= 0) + { + // extra pipe in the replacement string gives us the right formatting for our object + strRes = strRes.Replace(strRes.Substring(intStart), "\n|}}"); + } + + // split the string by the leading | on each line + string[] strSep = { "\n|" }; + string[] strSplit = strRes.Split(strSep, System.StringSplitOptions.RemoveEmptyEntries); + + string[] strSplit2 = { "=" }; + + // instantiate new item, which contains the item name and a dynamic object containing its properties + OrderedDictionary itm = new OrderedDictionary(); + + OrderedDictionary ordProps = new OrderedDictionary(); + + ordProps["name"] = strItem; + + // String manipulation!! + foreach (string s in strSplit) + { + string[] strElements = s.Split(strSplit2, 2, System.StringSplitOptions.RemoveEmptyEntries); + + char[] strTrim = { ' ', '\n' }; + + int i = 0; + + foreach (var s2 in strElements.ToList()) + { + string s3 = s2.TrimStart(strTrim); + string s4 = s3.TrimEnd(strTrim); + + strElements[i] = s4; + i++; + } + + if (strElements.Length > 1) + { + string propertyName = strElements[0]; + dynamic propertyValue = strElements[1]; + + //if (propertyName == "source") + if (new[] { + "source", "as", "dr", "md", "os", + "ingredients", "tingredients", "season", "produce", "drops", + "location", "occupants", "materials", "animals", + "favorites", "family", "friends", + "buff", "seed" }.Contains(propertyName)) + { + switch (propertyName) + { + case "source": + propertyName = "sources"; + break; + case "as": + propertyName = "wigglies"; + break; + case "dr": + propertyName = "reward"; + break; + case "md": + propertyName = "drops"; + break; + case "os": + propertyName = "othersrc"; + break; + } + + List sourceList = new List(); + string s2 = s; + + if (s2.Contains("[[") || s2.Contains("{{")) + { + parseSource(s2, "[[", "]]", ref sourceList); + parseSource(s2, "{{", "}}", ref sourceList); + } + else if (s2.Contains("=")) + { + sourceList.Add(propertyValue); + } + else + { + sourceList.Add(s2); + } + sourceList.Sort(); + + propertyValue = sourceList.ToArray(); + } + else if (propertyName == "profession") + { + // this is probably a comma-separated list + // it's used by the infobox to calculate prices + string[] list = (propertyValue as string).Split(new string[] { "," }, StringSplitOptions.None); + propertyValue = list; + } + else + { + propertyValue = parseString(propertyValue); + } + + // fix price key name + propertyName = propertyName == "price" ? "sellprice" : propertyName; + + + // strip double apostrophes from strings + if (propertyValue is string && propertyValue.Contains("''")) + { + propertyValue = (propertyValue as string).Replace("''", ""); + } + + //ordProps.Add(propertyName, propertyValue); + ordProps[propertyName] = propertyValue; + } + } + + // build description template + // {{Description|Prismatic%20Shard}} + string descTemplate = "{{Description|" + strItem + "}}"; + + // add a description + ordProps["description"] = Wikidata(descTemplate, "wikitext"); ; + + // this is where we can alphabetize the properties if we really want to + //itm.ItemProps = new SortedDictionary(dynProps); + itm = ordProps; + + // add profession property where necessary + if (itm["name"].ToString() == "Dinosaur Egg") + { + itm["profession"] = new string[] { "Rancher", "Artisan" }; + } + + // if it's a fish, add the right professions + if (Category == "Fish") + { + itm["profession"] = new string[] { "Fisher", "Angler" }; + } + + // calculate quality prices + if ((string)itm["quality"] == "true" || (string)itm["iridium"] == "true") + { + GetPrices(ref itm); + } + + // now we can do shit with the price based on profession + if (itm["profession"] != null && itm["sellprice"] != null) + { + // calculate price based on profession + string[] professions = (string[])itm["profession"]; + + foreach (string profession in professions) + { + GetPrices(ref itm, profession.ToLower()); + } + } + + // add a type of vegetable-p to hops, wheat, and tea leaves0 + if (new[] { "Hops", "Wheat", "Tea Leaves" }.Contains(itm["name"].ToString())) + { + itm["type"] = "vegetable-p"; + } + + // add a type of vegetable to corn + if (itm["name"].ToString() == "Corn") + { + itm["type"] = "vegetable"; + } + + // add a type of flower to members of Flowers + if (Globals.Flowers.Contains(itm["name"].ToString())) + { + itm["type"] = "flower"; + } + + string artPrice(object p) + { + string pInt = Math.Truncate(Convert.ToInt32(p) * 1.4).ToString(); + return pInt; + }; + + if (itm["type"] != null) + { + // get the base sell price + int sprice = Convert.ToInt32(itm["sellprice"]); + + // if it's a fruit or vegetable, calculate price of artisan good + // fruit + if (itm["type"].ToString() == "fruit") + { + // this can be made into jelly and wine + // wine is available in all qualities + // jelly = (1 * 2) + 50 + // wine = 1 * 3 + + // jelly + itm["price_jelly"] = ((sprice * 2) + 50).ToString(); + itm["price_jelly_artisan"] = artPrice(itm["price_jelly"]); + + // wine + GetPrices(ref itm, "wine_"); + } + // vegetable, vegetable-p + else if (itm["type"].ToString().StartsWith("vegetable")) + { + // this can be made into pickles and juice + // pickles = (1 * 2) + 50 + // juice = 1 * 2.25 + + // pickles + itm["price_pickles"] = ((sprice * 2) + 50).ToString(); + itm["price_pickles_artisan"] = artPrice(itm["price_pickles"]); + + if (itm["type"].ToString() == "vegetable") + { + itm["price_juice"] = ((sprice * 2) + 50).ToString(); + itm["price_juice_artisan"] = artPrice(itm["price_juice"]); + } + } + // flower + else if (itm["type"].ToString() == "flower") + { + // 100g + (base x 2) + itm["price_honey"] = (100 + (sprice * 2)).ToString(); + itm["price_honey_artisan"] = artPrice(itm["price_honey"]); + } + } + + // if there's at least one artisan good associated with this + // get that good's data and append it to the item + if (Globals.ArtisanGoods.Contains(itm["name"])) + { + // get the name of the artisan good from the dictionary + string ArtGoodName = Globals.ArtisanGoods[itm["name"].ToString()].ToString(); + + // add a property to the item for the name of the artisan good + itm["artisan_good"] = ArtGoodName; + + // generate the infobox data for the artisan good + // we'll take the prices from this + OrderedDictionary ArtGood = ItemBox(ArtGoodName, Category); + + // we can't easily filter an OrderedDictionary's keys + // convert list of keys to string array + string[] ArtGoodKeys = new string[ArtGood.Count]; + // copy all the keys to the array + ArtGood.Keys.CopyTo(ArtGoodKeys, 0); + + // filter for entities named "price_..." + IEnumerable AGKeys = ArtGoodKeys.Where(agk => agk.Contains("price_")); + + // for each key in the filtered list, retrieve the key-value pair from the infobox + foreach (string k in AGKeys) + { + int theValue = Convert.ToInt32(ArtGood[k]); + // this use the product name, we probably don't want his + string artName = (ArtGoodName.ToLower()).Replace(" ", "_") + "_"; + // set this part of the key to "product" instead + artName = "product_"; + string theKey = k.Replace("price_", "price_" + artName); + + itm[theKey] = theValue.ToString(); ; + } + } + + //itm.Dump(); + + // add to object array + // AllItems.Add(itm); + // JsonConvert.SerializeObject(ordProps, Newtonsoft.Json.Formatting.Indented).Dump(); + return ordProps; + } + else + { + return null; + } + } + + /// + /// Appends quality prices to item OrderedDictionary: GetPrices(ref item, string profession) // set profession to "wine_" for wine pricing + /// + static void GetPrices(ref OrderedDictionary itm, string pro = null) + { + // get the base price from the item + int price = Convert.ToInt32(itm["sellprice"]); + + // set the base multiplier + decimal multi = 1; + + // set the string prefix for key names + string pre = "price_"; + + // if a profession is defined, use its multiplier and add a prefix + if (pro != null) + { + if (pro == "wine_") + { + multi *= 3; + pre += pro; + } + else + { + multi *= Convert.ToDecimal(Globals.Professions[pro]); + pre += pro.ToLower() + "_"; + } + } + + // calculate prices and add key+value pairs to item + int i = 0; + foreach (DictionaryEntry qual in Globals.Quality) + { + string key = pre + i + "_" + qual.Key; + decimal newprice = price * Convert.ToDecimal(qual.Value) * multi; + itm[key] = Math.Truncate(newprice).ToString(); + i++; + } + + // calculate artisan prices for wine + if (pro == "wine_") + { + i = 0; + foreach (DictionaryEntry qual in Globals.Quality) + { + string key = pre + "artisan_" + i + "_" + qual.Key; + decimal bprice = Convert.ToDecimal(itm["price_wine_" + i + "_" + qual.Key]); + decimal newprice = bprice * Convert.ToDecimal(1.4); + itm[key] = Math.Truncate(newprice).ToString(); + i++; + } + } + + if ((string)itm["iridium"] != "true") + { + itm.Remove(pre + "3_iridium"); + } + } + + /// + /// Appends health and energy buffs to item OrderedDictionary: GetBuffs(ref item) + /// + static void GetBuffs(ref OrderedDictionary itm) + { + // get base edibility score + int basic = Convert.ToInt32(itm["edibility"]); + + // if <= -300, shit ain't edible + if (basic <= -300) + { + itm["edible"] = "false"; + } + else + { + itm["edible"] = "true"; + + // if > -300 AND if < 0 + if (basic > -300 && basic < 0) + { + foreach (DictionaryEntry m in Globals.BuffQuality) + { + string key = m.Key.ToString(); + int quality = Convert.ToInt32(m.Value); + + //now we do math for energy + int value = Convert.ToInt16(Math.Ceiling(basic * 2.5) + basic * quality); + + itm["energy_" + m.Value + "_" + key] = value; + } + } + else if (basic == 0) + { + // this shit's worthless, yo + itm["energy_base"] = 0; + + } + else if (basic > 0) + { + foreach (DictionaryEntry m in Globals.BuffQuality) + { + string key = m.Key.ToString(); + int quality = Convert.ToInt32(m.Value); + + // now we do math for energy + int evalue = Convert.ToInt16(Math.Truncate(Math.Ceiling(basic * 2.5) + basic * quality)); + int hvalue = Convert.ToInt16(Math.Truncate(evalue * 0.45)); + + itm["energy_" + m.Value + "_" + key] = evalue; + itm["health_" + m.Value + "_" + key] = hvalue; + } + } + else + { + // this is so wrong + // how the fuck did you even get here? + } + } + } + + // function for returning wikitext from an API + static string Wikidata(string strInput, string strType) + { + string strURL = "https://stardewvalleywiki.com/mediawiki/api.php?action=query&prop=revisions&rvprop=content&format=json&rvsection=1&titles="; + + // create the URL based on the type + // set the XML node path + switch (strType) + { + case "item": + strURL = "http://stardewvalleywiki.com/mediawiki/api.php?action=query&prop=revisions&rvprop=content&format=json&rvsection=0&titles=" + strInput; + // "[query][pages][$pageid$][revisions][0][*]"; + break; + case "image": + strURL = "http://stardewvalleywiki.com/mediawiki/api.php?action=query&prop=imageinfo&iiprop=url&continue=&format=json&titles=File:" + strInput; + // "[query][pages][$pageid$][imageinfo][0][url]"; // this is the actual image file from the wiki + break; + case "category": + strURL = "https://stardewvalleywiki.com/mediawiki/api.php?action=query&prop=revisions&rvprop=content&format=json&rvsection=0&titles=Template:Navbox" + strInput; + // "[query][pages][$pageid$][revisions][0][*]"; + break; + case "wikitext": + strURL = "https://stardewvalleywiki.com/mediawiki/api.php?action=expandtemplates&prop=wikitext&format=json&text=" + strInput; + // "[expandtemplates][wikitext]"; + break; + default: + break; + } + + string raw = new WebClient().DownloadString(strURL); + + JObject obj = JObject.Parse(raw.ToString()); + + JObject objPage = obj; + string pageid = ""; + + if (strType != "wikitext") + { + objPage = (JObject)obj["query"]["pages"]; + + pageid = ""; + + foreach (JProperty p in objPage.Properties()) + { + if (pageid == "") + { + pageid = p.Name; + } + else + { + break; + } + } + } + + switch (strType) + { + case "item": + case "category": + return objPage[pageid]["revisions"][0]["*"].ToString(); + case "image": + return objPage[pageid]["imageinfo"][0]["url"].ToString(); + case "wikitext": + return objPage["expandtemplates"]["wikitext"].ToString(); + default: + return ""; + } + } + + // function for parsing strings + static string parseString(string s) + { + // the default output is the input string + string result = s; + + // replace all HTML entities with characters + result = HttpUtility.HtmlDecode(s); + + // if the string contains wikitext markup, parse it + // otherwise, return propertyValue + if (new string[] { "[[", "{{" }.Any(str => s.Contains(str))) + { + if (s.Contains("[[")) + { + // this string has one or more [[wikilinks]] + // extract all [[wikilinks]] + Match m = Regex.Match(s, @"\[\[(.+?)\]\]"); // .+? means lazy (ungreedy) matching + + while (m.Success) + { + // check for File: first + // then check for pipe without File: + string linktext = m.Value; + + if (linktext.Contains("File")) + { + // this link contains a file. do things. + if (linktext.Contains("Level")) + { + // [[File:Fishing Skill Icon.png|24px|link=]] [[Fishing]] Level 2 + // Fishing]] Level 2 + // retrieve whole wikilink and concatenate second and third values + Match mx = Regex.Match(linktext, @"\[\[File.+?\]\] \[\[(.+)\]\]( Level [0-9]+)", RegexOptions.IgnoreCase); + + // for every match, perform string replacement + while (mx.Success) + { + // Fishing Level 2 + string strOld = mx.Groups[1].Captures[0].Value; + string strNew = mx.Groups[2].Captures[0].Value; + + result = result.Replace(strOld, strNew); + + mx = mx.NextMatch(); + } + } + else + { + // [[File:Shirt001.png|center]] + // [[File:Axe.png]] + // retrieve whole wikilink, second capture group is the filename + Match mx = Regex.Match(linktext, @"\[\[File:([\w -_.]+).*?\]\]", RegexOptions.IgnoreCase); + + // for every match, perform string replacement + while (mx.Success) + { + // Shirt001.png + // Axe.png + string strOld = mx.Groups[0].Captures[0].Value; + string strNew = mx.Groups[1].Captures[0].Value; + + result = result.Replace(strOld, strNew); + + mx = mx.NextMatch(); + } + } + } + else + { + // [[The Mines]] + // [[Random Events#Meteorite|meteorite]] + // this is a regular ol' link + if (linktext.Contains("|")) + { + string[] linkProps = m.Value.Split(new string[] { "|" }, StringSplitOptions.None); + result = result.Replace(m.Value, linkProps[1]); + } + else + { + result = result.Replace(m.Value, m.Value.Substring(2, m.Value.Length - 3)); + } + } + + m = m.NextMatch(); + } + } + + if (s.Contains("{{")) + { + // this string has one or more {{wikitext templates}} + // extract all {{wikitext templates}} + + Match m = Regex.Match(s, @"\{\{(.+?)\}\}"); // .+? means lazy (ungreedy) matching + + while (m.Success) + { + // this is the raw wikitext of the match + string wikitext = m.Value; + + // this is the string minus the start and end brackets + string input = wikitext.Substring(2, wikitext.Length - 4); + + // return this if all else fails + result = input; + + // we can just split by the pipe for this - all {{wikitext templates}} use pipe as the delimiter + string[] segments = input.Split(new string[] { "|" }, StringSplitOptions.RemoveEmptyEntries); + + // for case-insensitive string.Contains() + string templateName = segments[0].ToLower(); + string littletext = wikitext.ToLower(); + + if (segments.Count() > 1) + { + if (templateName == "name") + { + + if (segments.Count() == 2) + { + // return second segment as-is + result = segments[1]; + } + else + { + // {{name|Mining|Level 9|class=inline}} + // everything past the third segment (e.g. 50) can be ignored + if (littletext.Contains("level")) + { + // {{name|Farming|Level 3|image=Farming Skill Icon.png}} + // Farming Level 3 + // test with Equipment + result = segments[1] + " " + segments[2]; + } + else if (littletext.Contains("+")) + { + // {{name|Defense|+3} + // +3 Defense + // test with Clothing and Weapons + result = segments[2] + " " + segments[1]; + } + else + { + // {{name|Omni Geode|50|...}} + // Omni Geode {50) + result = segments[1] + " (" + segments[2] + ")"; + } + } + } + else if (templateName == "npc") + { + // {{NPC|Jodi|Mother}} + // Jodi (Mother) + if (segments.Count() > 2) + result = segments[1] + " (" + segments[2] + ")"; + else + result = segments[1]; + } + else if (templateName == "price") + { + // {{Price|30|Currency}} + // JOPK, Qi, Star Token (if third parameter is empty, this is regular gold) + result = segments[1]; + + if (segments.Count() > 2) + { + switch (segments[2]) + { + case "JOPK": + result += " tokens"; + break; + case "Qi": + result += " Qui coins"; + break; + case "Token": + result += " star tokens"; + break; + } + } + else + { + result += "g"; + } + } + else if (templateName == "description") + { + // {{Description|Wild Horseradish}} + // {{Description|Recipe|Lucky Lunch}} + + // this returns a description string + // use this API call: + // https://stardewvalleywiki.com/mediawiki/api.php?action=expandtemplates&prop=wikitext&format=json&text=... + // result["expandtemplates"]["wikitext"] + + result = Wikidata(wikitext, "wikitext"); + + } + else + { + // this is wrong + result += " ***BAD INPUT***"; + } + + m = m.NextMatch(); + } + } + } + } + + // clean

tags from result + result = result.Replace("

", @"\n"); + result = result.Replace("

", ""); + + return result; + } + + // function for parsing single line values + static string parseStringOld(string propertyValue) + { + // {{name=Skill|Level #|imge=...}} + string regex_new = @"(?<={{name\|)\w+\|Level [0-9]+(?=\|.+)"; + Regex rgx_new = new Regex(regex_new, RegexOptions.IgnoreCase); + Match m_new = rgx_new.Match(propertyValue); + + // [[File:...]] [[Skill]] Level # + string regex_old = @"(?!\[\[File.+\]\] \[\[)\w+\]\] Level [0-9]+"; + Regex rgx_old = new Regex(regex_old, RegexOptions.IgnoreCase); + Match m_old = rgx_old.Match(propertyValue); + + // {{NPC|...|...[[..]]}} + // {{NPC|...|...}} + string regex_npc = @"(?<=\{\{NPC\|)[A-Za-z0-9 \-\+\|]+(?=\[\[|\}\})"; + Regex rgx_npc = new Regex(regex_npc, RegexOptions.IgnoreCase); + Match m_npc = rgx_npc.Match(propertyValue); + + // {{description|...}} + string regex_desc = @"(?<=\{\{description\|)[A-Za-z0-9 \-\+\|]+(?=|\}\})"; + Regex rgx_desc = new Regex(regex_desc, RegexOptions.IgnoreCase); + Match m_desc = rgx_desc.Match(propertyValue); + + // clean up wikilink syntax + string regex_link = @"\[\[.+\|(\w+)\]\]"; + Regex rgx_link = new Regex(regex_link, RegexOptions.IgnoreCase); + Match m_link = rgx_link.Match(propertyValue); + + // remove wikilink brackets + string regex_bckt = @"(?<=\[\[)[\w\s']+(?=\]\])"; + Regex rgx_bckt = new Regex(regex_bckt, RegexOptions.IgnoreCase); + Match m_bckt = rgx_bckt.Match(propertyValue); + + string result = propertyValue; + + if (m_new.Success) + { + result = m_new.Value.Replace("|", " "); + } + else if (m_old.Success) + { + result = m_old.Value.Replace("]]", ""); + } + else if (m_npc.Success) + { + result = m_npc.Value.Replace("|", " - "); + if (result.Contains("+")) + { + result += "hearts"; + } + } + else if (m_desc.Success) + { + result = m_desc.Value; + } + else if (m_link.Success) + { + result = propertyValue.Replace(m_link.Groups[0].Value, m_link.Groups[1].Value); + } + else if (m_bckt.Success) + { + result = (propertyValue.Replace("[", "")).Replace("]", ""); + } + + return result; + } + + // function for parsing lists into arrays + static void parseSource(string strInput, string strStart, string strEnd, ref List lstSauce) + { + // add blacklisted entries here + List avoid = new List() { }; + + // clean string of extraneous characters that break shit + strInput = strInput.Replace("{{!}}", ""); + + while (strInput.IndexOf(strStart) > -1) + { + // trim any whitespace + strInput = strInput.Trim(); + + // find the first instance of the start characters + int start = strInput.IndexOf(strStart); + + // find the first instance of the end characters + int end = strInput.IndexOf(strEnd) + 2; + + // get the length of the string to be extracted + int length = end - start; + + // extract the string + string sub = strInput.Substring(start, length); + + // remove the start and end characters + sub = sub.Substring(2, (sub.Length - 4)); + + // remove the extracted string + strInput = strInput.Remove(start, length); + + // clean up the substring + // don't include items that match the blacklist + + //"name|Fishing|Level 2|image=Fishing Skill Icon.png" + //"name|Bug Meat|1" + + if (!avoid.Any(sub.Contains)) + { + if (sub.Contains("|")) + { + string[] strSep = { "|" }; + string[] subSplit = sub.Split(strSep, System.StringSplitOptions.RemoveEmptyEntries); + if (sub.Contains("Bundle")) + { + sub = subSplit[1] + " Bundle"; + } + else if (strInput.Contains("ingredients")) + { + sub = subSplit[1] + " (" + subSplit[2] + ")"; + } + else if (strInput.Contains("recipe")) + { + Console.Write(sub, " | "); + } + else + { + sub = subSplit[1]; + } + } + + lstSauce.Add(sub); + } + } + } + + /// + /// global constants for different functions + /// + public class Globals + { + /// + /// list of blacklisted items by name - these will be skipped + /// + public static List avoid = new List + { + "Axes", + "Hoes", + "Pickaxes", + "Trash Cans", + "Watering Cans" + }; + + /// + /// list of categories for navbox wikitext parsing + /// + public static string[] Categories = + { + "Animals", // 0 + "Artifacts", // 1 + "Artisan Goods", // 2 + "Buildings", // 3 + "Clothing", // 4 + "Crop", // 5 + "Decor", // 6 + "Equipment", // 7 + "Fish", // 8 + "Foraging", // 9 + "Furniture", // 10 + "Ingredients", // 11 + "Lighting", // 12 + "Minerals", // 13 + "Monsters", // 14 + "Recipes", // 15 + "Resources", // 16 + "Seeds", // 17 + "Tree", // 18 + "Tools", // 19 + "Villagers", // 20 + "Warp Totems", // 21 + "Weapons" // 22 + }; + + /// + /// Price multiplier for each Quality rating + /// + public static OrderedDictionary Quality = new OrderedDictionary + { + { "base", 1 }, + { "silver", 1.25 }, + { "gold", 1.5 }, + { "iridium", 2 } + }; + + public static OrderedDictionary BuffQuality = new OrderedDictionary + { + { "base", 0 }, + { "silver", 1 }, + { "gold", 2 }, + { "iridium", 4 } + }; + + /// + /// Price multiplier for each Profession + /// + public static OrderedDictionary Professions = new OrderedDictionary + { + { "artisan", 1.4 }, + { "rancher", 1.2 }, + { "gemologist", 1.3 }, + { "tiller", 1.1 }, + { "blacksmith", 1.5 }, + { "forester", 1.25 }, + { "fisher", 1.25 }, + { "angler", 1.5 } + }; + + /// + /// Produce-to-ArtisanGood relationship + /// + public static OrderedDictionary ArtisanGoods = new OrderedDictionary + { + { "Hops", "Pale Ale" }, + { "Wheat", "Beer" }, + { "Honey", "Mead" }, + { "Milk", "Cheese" }, + { "Large Milk", "Cheese" }, + { "Goat Milk", "Goat Cheese" }, + { "Large Goat Milk", "Goat Cheese" }, + { "Coffee Bean", "Coffee" }, + { "Tea Leaves", "Green Tea" }, + { "Wool", "Cloth" }, + { "Egg", "Mayonnaise" }, + { "Large Egg", "Mayonnaise" }, + { "Void Egg", "Void Mayonnaise" }, + { "Dinosaur Egg", "Dinosaur Mayonnaise" }, + { "Truffle", "Truffle Oil" }, + { "Corn", "Oil" }, + { "Sunflower", "Oil" }, + { "Sunflower Seeds", "Oil" }, + { "Sturgeon Roe", "Caviar" }, + { "Roe", "Aged Roe" }, + }; + + /// + /// List of flowers for honey + /// + public static List Flowers = new List + { + { "Blue Jazz" }, + { "Fairy Rose" }, + { "Poppy" }, + { "Summer Spangle" }, + { "Sunflower" }, + { "Tulip" } + }; + } + } } diff --git a/StardewScraper/StardewScraper.csproj b/StardewScraper/StardewScraper.csproj index c73e0d1..0eaee1b 100644 --- a/StardewScraper/StardewScraper.csproj +++ b/StardewScraper/StardewScraper.csproj @@ -5,4 +5,8 @@ netcoreapp3.1 + + + +