<RuntimeDirectory>\System.Web.dll Newtonsoft.Json Newtonsoft.Json.Linq System.Collections.Specialized System.Net Newtonsoft.Json System.Web void Main() { Util.AutoScrollResults = true; Console.Write(@" This script uses a standard mediawiki navbox template to build JSON data for Flexbook. Override string[] Categories to limit results for testing. Set the metadata variables for the template below. Complete: * Tool item objects for basic tools * Item name parsing for ignored names and wikilink syntax ====================================================================================== "); // meta OrderedDictionary meta = new OrderedDictionary(); meta["author"] = "ClairelyClaire"; meta["version"] = "0.1"; meta["published"] = "1/4/2020"; int startCat = 0; int subCount = Globals.Categories.Count() - startCat; // override startCat to start at category - count to category then subtract 1 // override subCount to limit length, minimum size 1 startCat = 19; subCount = 1; string[] CategoryList = Globals.Categories.ToList().GetRange(startCat, subCount).ToArray(); // categories that need completely special processing // Festivals, Locations, Seasons, SkillsStats OrderedDictionary sauce = new OrderedDictionary(); sauce["metadata"] = meta; List lstCategories = new List(); // create a new blank template // OrderedDictionary template = new OrderedDictionary(); //OrderedDictionary template = new OrderedDictionary(); Dictionary template = new Dictionary(); // keyed template - helper for the template creator SortedDictionary templateKeys = new SortedDictionary(); // every template must have a "name" parameter template["name"] = "Name"; foreach (string strCategory in CategoryList) { Console.WriteLine("\n------------------------------\n" + strCategory + "\n------------------------------"); // this is the category name for the Navbox template on the wiki string cat = strCategory; // remove spaces from Navbox template category name if (strCategory.Contains(" ")) cat = cat.Replace(" ",""); // return the Navbox wikitext // this is what we have to parse to create subcategories and items string wikitext = Wikidata(cat,"category"); // array of all lines in the wikitext; this is a wiki table so we can just split by newline List source = wikitext.Split(new string[] { "\n" }, StringSplitOptions.None).ToList(); // set all our empty variables // // new array of lines to be processed by the data scraper List lines = new List(); // holds the count of subcategories // if nonzero, do special things int rowcount = 0; // holds the previous line's value string prev = ""; OrderedDictionary categorySauce = new OrderedDictionary(); // use Category, not cat - this is the display name categorySauce["category"] = strCategory; List subcategorySauce = new List(); string currentSubcat = ""; int currentSubI = -1; for (int i = 0; i < source.Count(); i++) { string line = source[i]; // if the line starts with a blacklisted string (item or subcategory), skip it if (Globals.Avoid.Any(b => line.StartsWith(b))) { continue; } // this is a header, which means it's a subcategory else if (line.StartsWith("!")) { // empty string for subcategory name string subcat = ""; // find rowspan for sub-subcategories if (line.Contains("rowspan")) { // this means the given subcategory has its own children // so get the subcategory name to prepend to the children // get the number of rows (sub-subcategories) Match mt = Regex.Match(line, @"(?!\! rowspan=""{0,})[0-9]+(?=""{0,}.+)"); // set the number of rows (sub-subcategories) rowcount = Convert.ToInt32(mt.Value); // now get the value of this line mt = Regex.Match(line, @"(?<=\[\[).+(?=\]\])|(?<=\|)[A-Z][A-Za-z ]+"); prev = mt.Value.Trim(); continue; } else { // this must be something worth processing if (rowcount > 0 && prev != "") { // sub-subcategory // example: Fishing - Bait // this value is "Bait" Match mt = Regex.Match(line, @"(?<=\[\[)(.+\||)(.+)(?=\]\])"); // blank string if (mt.Success) { string cur = mt.Groups[mt.Groups.Count - 1].Value; // create the new subcategory title subcat = prev + " - " + cur; } else { mt = Regex.Match(line, @"(?!\|\s*)[A-Za-z ]+"); // create the new subcategory title subcat = prev + " - " + mt.Value; } // subtract subcategory count by 1 rowcount--; if (rowcount <= 0) { rowcount = 0; prev = ""; } } else if (line.Contains("[[") && line.Contains("|")) { // ![[Wiki Link|Display Name]] Match mt = Regex.Match(line, @"(?!.+\|\s*)[A-Za-z ]+"); subcat = mt.Groups[mt.Groups.Count - 1].Value.Trim(); } else if (line.Contains("[[")) { // ![[Wiki Link]] Match mt = Regex.Match(line, @"(?<=\[\[).+(?=\]\])"); subcat = mt.Value.Trim(); } else if (line.Contains(cat) && !line.Contains("colspan")) { // subcategory equals category // we know this isn't the navbox header row since there's no colspan Match mt = Regex.Match(line, @"(?!\!.+\|\s*)[A-Za-z ]+"); subcat = mt.Value.Trim(); } else { Match mt = Regex.Match(line, @"(?!.+\|\s*)[A-Za-z ]+"); subcat = mt.Value; } if (subcat != "") { // create a new subcategory object and populate it with its name OrderedDictionary sc = new OrderedDictionary(); sc.Add("subcategory", subcat.Trim()); subcategorySauce.Add(sc); currentSubcat = subcat; currentSubI++; lines.Add(subcat.Trim()); } } } else if (line.StartsWith("|")) { // if the property value is ever any of these, overwrite it with a blank string string[] itemPropsBlacklist = { "", "|", "N/A", "\n" }; // THIS IS WHERE THE INFOBOX MAGIC HAPPENS // pass mt.Value to the wiki API Match mt = Regex.Match(line, @"(?<=\[\[).+?(?=\]\])"); List Items = new List(); while (mt.Success) { OrderedDictionary itemProps = new OrderedDictionary(); string val = mt.Value; if (!Globals.Avoid.Any(val.Contains)) { // if this is a party hat, do some special stuff if (val.Contains("Party Hat")) { // split on the pipe string[] props = val.Split(new string[] { "|" }, StringSplitOptions.None); if (props[1] != "Party Hat") { itemProps["name"] = props[0]; } else { mt = mt.NextMatch(); continue; } } else if (val.Contains("|")) { // split on the pipe string[] props = val.Split(new string[] { "|" }, StringSplitOptions.None); itemProps["name"] = props[0]; itemProps["display_name"] = props[1]; } else { itemProps["name"] = val; } // get individual item infoboxes here // check all values against itemPropsBlacklist string strItem = itemProps["name"].ToString(); ItemBox(strItem, strCategory) } // if itemProps is populated, add it to the Items list if (itemProps.Count > 0) { Items.Add(itemProps); lines.Add(val); } mt = mt.NextMatch(); } // if this is Tools > Basic, parse the tool pages separately if (cat == "Tools" && subcategorySauce[currentSubI]["subcategory"].ToString() == "Basic") { ParseTools(ref itemPropsBlacklist, ref Items); } // make sure items are alphabetized Items.Sort((OrderedDictionary a, OrderedDictionary b) => a["name"].ToString().CompareTo(b["name"].ToString())); subcategorySauce[currentSubI]["items"] = Items; } else { // do nothing } categorySauce["subcategories"] = subcategorySauce; if (currentSubI >= 0 && currentSubI < subcategorySauce.Count()) { if (subcategorySauce[currentSubI]["items"] != null) { // subcategorySauce[currentSubI].Dump(); } } } lstCategories.Add(categorySauce); } // sort the template attributes? List> template2 = template.ToList>(); template2.Sort((x,y) => x.Key.CompareTo(y.Key)); OrderedDictionary templateDict = new OrderedDictionary(); foreach (KeyValuePair kvp in template2) { templateDict[kvp.Key] = kvp.Value; } string templateJSON = JsonConvert.SerializeObject(templateKeys, Newtonsoft.Json.Formatting.Indented); //templateJSON.Dump(); sauce["template"] = templateDict; sauce["content"] = new OrderedDictionary(); ((OrderedDictionary)sauce["content"])["categories"] = lstCategories; //sauce.Dump(); string jo = JsonConvert.SerializeObject(sauce, Newtonsoft.Json.Formatting.Indented); // save a new one if files already exist // DirectoryInfo dataDir = new DirectoryInfo(@"F:\Projects\Flexbook\Test Data"); string pcname = Environment.MachineName.ToLower(); string drv = "C"; switch (pcname) { case "breve": drv = "D"; break; case "zelda": drv = "F"; break; } string dir = drv + @":\Projects\Flexbook\Test Data"; // dir = @"D:\Source\Flexbook\Test Data"; DirectoryInfo dataDir = new DirectoryInfo(dir); FileInfo[] Files = dataDir.GetFiles("stardewsample-all*.json"); File.WriteAllText(dir + @"\stardewsample-all-" + Files.Count() + ".json", jo); // build the template key file string jtk = JsonConvert.SerializeObject(templateKeys, Newtonsoft.Json.Formatting.Indented); File.WriteAllText(dir + @"\stardewsample-all-" + Files.Count() + "-template-keys.json", jtk); } // needs to be a void with a ref to the existing item dictionary, to which items should be added // also refs to template and templateKeys OrderedDictionary ItemBox(string strItem, string strCategory) { // skip if the node exists in our blacklist if (!Globals.Avoid.Contains(strItem) && strItem != Globals.Categories[0]) { // get first section of item's wiki page // this contains the infobox // this returns JSON, not XML string strWiki = Wikidata(strItem, "item"); // strip out the content between the starting {{ and the first line break Regex rgx = new Regex("Infobox.*"); string strRes = rgx.Replace(strWiki, ""); // strip out everything past the closing }} // find index of closing }}, which is always preceded by a newline string strRep2 = "\n}}"; int intStart = strRes.IndexOf(strRep2); if (intStart >= 0) { // extra pipe in the replacement string gives us the right formatting for our object strRes = strRes.Replace(strRes.Substring(intStart), "\n|}}"); } // split the string by the leading | on each line string[] strSep = { "\n|" }; string[] strSplit = strRes.Split(strSep, System.StringSplitOptions.RemoveEmptyEntries); string[] strSplit2 = { "=" }; // instantiate new item, which contains the item name and a dynamic object containing its properties OrderedDictionary itm = new OrderedDictionary(); OrderedDictionary ordProps = new OrderedDictionary(); ordProps["name"] = strItem; // String manipulation!! foreach (string s in strSplit) { string[] strElements = s.Split(strSplit2, 2, System.StringSplitOptions.RemoveEmptyEntries); char[] strTrim = { ' ', '\n' }; int i = 0; foreach (var s2 in strElements.ToList()) { string s3 = s2.TrimStart(strTrim); string s4 = s3.TrimEnd(strTrim); strElements[i] = s4; i++; } if (strElements.Length > 1) { string propertyName = strElements[0]; dynamic propertyValue = strElements[1]; //if (propertyName == "source") if (new[] { "source", "as", "dr", "md", "os", "ingredients", "tingredients", "season", "produce", "drops", "location", "occupants", "materials", "animals", "favorites", "family", "friends", "buff", "seed" }.Contains(propertyName)) { switch (propertyName) { case "source": propertyName = "sources"; break; case "as": propertyName = "wigglies"; break; case "dr": propertyName = "reward"; break; case "md": propertyName = "drops"; break; case "os": propertyName = "othersrc"; break; } List sourceList = new List(); string s2 = s; if (s2.Contains("[[") || s2.Contains("{{")) { parseSource(s2, "[[", "]]", ref sourceList); parseSource(s2, "{{", "}}", ref sourceList); } else if (s2.Contains("=")) { sourceList.Add(propertyValue); } else { sourceList.Add(s2); } sourceList.Sort(); propertyValue = sourceList.ToArray(); } else if (propertyName == "profession") { // this is probably a comma-separated list // it's used by the infobox to calculate prices string[] list = (propertyValue as string).Split(new string[] { "," }, StringSplitOptions.None); propertyValue = list; } else { propertyValue = parseString(propertyValue); } // fix price key name propertyName = propertyName == "price" ? "sellprice" : propertyName; // strip double apostrophes from strings if (propertyValue is string && propertyValue.Contains("''")) { propertyValue = (propertyValue as string).Replace("''", ""); } //ordProps.Add(propertyName, propertyValue); ordProps[propertyName] = propertyValue; } } // build description template // {{Description|Prismatic%20Shard}} string descTemplate = "{{Description|" + strItem + "}}"; // add a description ordProps["description"] = Wikidata(descTemplate, "wikitext"); ; // this is where we can alphabetize the properties if we really want to //itm.ItemProps = new SortedDictionary(dynProps); itm = ordProps; // add profession property where necessary if (itm["name"].ToString() == "Dinosaur Egg") { itm["profession"] = new string[] { "Rancher", "Artisan" }; } // if it's a fish, add the right professions if (strCategory == "Fish") { itm["profession"] = new string[] { "Fisher", "Angler" }; } // calculate quality prices if ((string)itm["quality"] == "true" || (string)itm["iridium"] == "true") { GetPrices(ref itm); } // now we can do shit with the price based on profession if (itm["profession"] != null && itm["sellprice"] != null) { // calculate price based on profession string[] professions = (string[])itm["profession"]; foreach (string profession in professions) { GetPrices(ref itm, profession.ToLower()); } } // add a type of vegetable-p to hops, wheat, and tea leaves0 if (new[] { "Hops", "Wheat", "Tea Leaves" }.Contains(itm["name"].ToString())) { itm["type"] = "vegetable-p"; } // add a type of vegetable to corn if (itm["name"].ToString() == "Corn") { itm["type"] = "vegetable"; } // add a type of flower to members of Flowers if (Globals.Flowers.Contains(itm["name"].ToString())) { itm["type"] = "flower"; } string artPrice(object p) { string pInt = Math.Truncate(Convert.ToInt32(p) * 1.4).ToString(); return pInt; }; if (itm["type"] != null) { // get the base sell price int sprice = Convert.ToInt32(itm["sellprice"]); // if it's a fruit or vegetable, calculate price of artisan good // fruit if (itm["type"].ToString() == "fruit") { // this can be made into jelly and wine // wine is available in all qualities // jelly = (1 * 2) + 50 // wine = 1 * 3 // jelly itm["price_jelly"] = ((sprice * 2) + 50).ToString(); itm["price_jelly_artisan"] = artPrice(itm["price_jelly"]); // wine GetPrices(ref itm, "wine_"); } // vegetable, vegetable-p else if (itm["type"].ToString().StartsWith("vegetable")) { // this can be made into pickles and juice // pickles = (1 * 2) + 50 // juice = 1 * 2.25 // pickles itm["price_pickles"] = ((sprice * 2) + 50).ToString(); itm["price_pickles_artisan"] = artPrice(itm["price_pickles"]); if (itm["type"].ToString() == "vegetable") { itm["price_juice"] = ((sprice * 2) + 50).ToString(); itm["price_juice_artisan"] = artPrice(itm["price_juice"]); } } // flower else if (itm["type"].ToString() == "flower") { // 100g + (base x 2) itm["price_honey"] = (100 + (sprice * 2)).ToString(); itm["price_honey_artisan"] = artPrice(itm["price_honey"]); } } // if there's at least one artisan good associated with this // get that good's data and append it to the item if (Globals.ArtisanGoods.Contains(itm["name"])) { // get the name of the artisan good from the dictionary string ArtGoodName = Globals.ArtisanGoods[itm["name"].ToString()].ToString(); // add a property to the item for the name of the artisan good itm["artisan_good"] = ArtGoodName; // generate the infobox data for the artisan good // we'll take the prices from this OrderedDictionary ArtGood = ItemBox(ArtGoodName, strCategory); // we can't easily filter an OrderedDictionary's keys // convert list of keys to string array string[] ArtGoodKeys = new string[ArtGood.Count]; // copy all the keys to the array ArtGood.Keys.CopyTo(ArtGoodKeys, 0); // filter for entities named "price_..." IEnumerable AGKeys = ArtGoodKeys.Where(agk => agk.Contains("price_")); // for each key in the filtered list, retrieve the key-value pair from the infobox foreach (string k in AGKeys) { int theValue = Convert.ToInt32(ArtGood[k]); // this use the product name, we probably don't want his string artName = (ArtGoodName.ToLower()).Replace(" ", "_") + "_"; // set this part of the key to "product" instead artName = "product_"; string theKey = k.Replace("price_", "price_" + artName); itm[theKey] = theValue.ToString(); ; } } //itm.Dump(); // add to object array // AllItems.Add(itm); // JsonConvert.SerializeObject(ordProps, Newtonsoft.Json.Formatting.Indented).Dump(); return ordProps; } else { return null; } } /// /// Appends quality prices to item OrderedDictionary: GetPrices(ref item, string profession) // set profession to "wine_" for wine pricing /// void GetPrices(ref OrderedDictionary itm, string pro = null) { // get the base price from the item int price = Convert.ToInt32(itm["sellprice"]); // set the base multiplier decimal multi = 1; // set the string prefix for key names string pre = "price_"; // if a profession is defined, use its multiplier and add a prefix if (pro != null) { if (pro == "wine_") { multi *= 3; pre += pro; } else { multi *= Convert.ToDecimal(Globals.Professions[pro]); pre += pro.ToLower() + "_"; } } // calculate prices and add key+value pairs to item int i = 0; foreach (DictionaryEntry qual in Globals.Quality) { string key = pre + i + "_" + qual.Key; decimal newprice = price * Convert.ToDecimal(qual.Value) * multi; itm[key] = Math.Truncate(newprice).ToString(); i++; } // calculate artisan prices for wine if (pro == "wine_") { i = 0; foreach (DictionaryEntry qual in Globals.Quality) { string key = pre + "artisan_" + i + "_" + qual.Key; decimal bprice = Convert.ToDecimal(itm["price_wine_" + i + "_" + qual.Key]); decimal newprice = bprice * Convert.ToDecimal(1.4); itm[key] = Math.Truncate(newprice).ToString(); i++; } } if ((string)itm["iridium"] != "true") { itm.Remove(pre + "3_iridium"); } } /// /// Appends health and energy buffs to item OrderedDictionary: GetBuffs(ref item) /// void GetBuffs(ref OrderedDictionary itm) { // get base edibility score int basic = Convert.ToInt32(itm["edibility"]); // if <= -300, shit ain't edible if (basic <= -300) { itm["edible"] = "false"; } else { itm["edible"] = "true"; // if > -300 AND if < 0 if (basic > -300 && basic < 0) { foreach (DictionaryEntry m in Globals.BuffQuality) { string key = m.Key.ToString(); int quality = Convert.ToInt32(m.Value); //now we do math for energy int value = Convert.ToInt16(Math.Ceiling(basic * 2.5) + basic * quality); itm["energy_" + m.Value + "_" + key] = value; } } else if (basic == 0) { // this shit's worthless, yo itm["energy_base"] = 0; } else if (basic > 0) { foreach (DictionaryEntry m in Globals.BuffQuality) { string key = m.Key.ToString(); int quality = Convert.ToInt32(m.Value); // now we do math for energy int evalue = Convert.ToInt16(Math.Truncate(Math.Ceiling(basic * 2.5) + basic * quality)); int hvalue = Convert.ToInt16(Math.Truncate(evalue * 0.45)); itm["energy_" + m.Value + "_" + key] = evalue; itm["health_" + m.Value + "_" + key] = hvalue; } } else { // this is so wrong // how the fuck did you even get here? } } } /// /// Returns a string of wikitext by search string and query type /// String Wikidata(string strInput, string strType) { string strURL = "https://stardewvalleywiki.com/mediawiki/api.php?action=query&prop=revisions&rvprop=content&format=json&rvsection=1&titles="; // create the URL based on the type // set the XML node path switch (strType) { case "item": strURL = "http://stardewvalleywiki.com/mediawiki/api.php?action=query&prop=revisions&rvprop=content&format=json&rvsection=0&titles=" + strInput; // "[query][pages][$pageid$][revisions][0][*]"; break; case "image": strURL = "http://stardewvalleywiki.com/mediawiki/api.php?action=query&prop=imageinfo&iiprop=url&continue=&format=json&titles=File:" + strInput; // "[query][pages][$pageid$][imageinfo][0][url]"; // this is the actual image file from the wiki break; case "category": strURL = "https://stardewvalleywiki.com/mediawiki/api.php?action=query&prop=revisions&rvprop=content&format=json&rvsection=0&titles=Template:Navbox" + strInput; // "[query][pages][$pageid$][revisions][0][*]"; break; case "wikitext": strURL = "https://stardewvalleywiki.com/mediawiki/api.php?action=expandtemplates&prop=wikitext&format=json&text=" + strInput; // "[expandtemplates][wikitext]"; break; default: break; } string raw = new WebClient().DownloadString(strURL); JObject obj = JObject.Parse(raw.ToString()); JObject objPage = obj; string pageid = ""; if (strType != "wikitext") { objPage = (JObject)obj["query"]["pages"]; pageid = ""; foreach (JProperty p in objPage.Properties()) { if (pageid == "") { pageid = p.Name; } else { break; } } } switch (strType) { case "item": case "category": return objPage[pageid]["revisions"][0]["*"].ToString(); case "image": return objPage[pageid]["imageinfo"][0]["url"].ToString(); case "wikitext": return objPage["expandtemplates"]["wikitext"].ToString(); default: return ""; } } /// /// Special parser for tool information /// void ParseTools(ref string[] blacklist, ref List items) { // the master list of tools List Tools = new List(); // tool groups string[] strTools = { "Axes", "Hoes", "Pickaxes", "Trash Cans", "Watering Cans" }; foreach (string strTool in strTools) { // the URL to hit for information string strURL = "https://stardewvalleywiki.com/mediawiki/api.php?action=query&prop=revisions&rvprop=content&format=json&rvsection=1&titles=" + strTool; // the raw JSON output from the API response var toolRaw = new WebClient().DownloadString(strURL); // parse the output into a JSON object model JObject toolObj = JObject.Parse(toolRaw.ToString()); // get the page element JObject toolPage = (JObject)toolObj["query"]["pages"]; // we have to find the page ID, which is super annoying string toolPageId = ""; foreach (JProperty p in toolPage.Properties()) { if (toolPageId == "") { toolPageId = p.Name; } else { break; } } // ok now we have the page ID // get the page content string strInput = toolPage[toolPageId]["revisions"][0]["*"].ToString(); // if there's content, process it if (strInput.Length > 0) { if (strInput.Contains("wikitable")) { // this is wikitable markup // convert into objects // find the column headers // these are the object properties Match headerMatches = Regex.Match(strInput, @"(?<=\n\!)(.+)"); List lstHeaders = new List(); // we need this information to know where the table content starts int hIdx = 0; int hLen = 0; // create a list of keys from wikitable headers while (headerMatches.Success) { string val = headerMatches.Value.Trim().ToLower(); hLen = headerMatches.Length; hIdx = headerMatches.Index; lstHeaders.Add(val == "improvements" ? "notes" : val); headerMatches = headerMatches.NextMatch(); } // extract the table rows // +4 for the \n|-\n at the beginning of this section int startI = hIdx + hLen + 4; int endI = strInput.Length - startI; // this is the wikitext for the table rows string wikiRows = strInput.Substring(startI, endI).Trim(); // split the string by the wikitext markup for a new row: |- List lstRows = wikiRows.Split(new string[] { "\n|-\n" }, StringSplitOptions.None).ToList(); foreach (string row in lstRows) { // split this into a list by regex // why? because some random bitch on the stardew valley wiki refuses to allow a single-line syntax on the affected pages // literally. // List rowcols = row.Split(new string[] { "\n|"}, StringSplitOptions.None).ToList(); List rowcols = Regex.Split(row, @"\n\|").ToList(); // empty array of column values for this row List newcols = new List(); OrderedDictionary tool = new OrderedDictionary(); int toolIndex = 0; foreach (string rowcol in rowcols) { string outval = ""; string newval = rowcol; if (rowcol.StartsWith("|")) { newval = rowcol.Substring(1); } else { newval = rowcol; } if (!blacklist.Contains(rowcol)) { // we have a column entry to parse if (newval.StartsWith("}")) { // this is the end of the table, so skip it continue; } else if (newval.Contains("[")) { // this is a link of some kind // extract all links matching regex // if link has a pipe, split it // if link is a file, use the image name as the "image" dictionary entry and discard anything after the first match // if the link isn't a file, use the displayname (second match) Match linkMts = Regex.Match(newval, @"(?<=\[\[).+(?=\]\])"); string tempval = ""; while (linkMts.Success) { // check for File: first // then check for pipe without File: string linktext = linkMts.Value; if (linktext.Contains("File:")) { Match fileMts = Regex.Match(linktext, @"(?<=File:)[\w .]+"); tempval = newval.Replace(linktext, fileMts.Value); } else if (linkMts.Value.Contains("|")) { string[] linkProps = linkMts.Value.Split(new string[] { "|" }, StringSplitOptions.None); if (linkProps[0].Contains("File")) { Match fileMts = Regex.Match(linkProps[0], @"(?<=File:).+"); // this is the image name tempval = newval.Replace(linkProps[0], fileMts.Value); } else { tempval = newval.Replace(linkMts.Value, linkProps[1]); } } linkMts = linkMts.NextMatch(); } outval = tempval.Replace("[", "").Replace("]", ""); } else if (newval.Contains("{")) { // there's data to be extracted here // see if it's a template Match tplMts = Regex.Match(newval, @"(?<=\{\{).+(?=\}\})"); while (tplMts.Success) { // split the value by pipe string[] splitVals = tplMts.Value.Split(new string[] { "|" }, StringSplitOptions.None); if (splitVals.Count() == 3) { // if array has three members, output like: Copper Bar (5) outval = splitVals[1] + " (" + splitVals[2] + ")"; } else if (splitVals.Length == 2) { // if the array has two members, ouput the second outval = splitVals[1]; } else { //this is wrong } tplMts = tplMts.NextMatch(); } } else { // use the value as-is outval = newval; } } else { // this is an empty column, use a space outval = " "; } // clean up extraneous wikitext markup outval = outval.Replace("'''", "").Replace("''", "").Replace("\n\n", "\n"); tool.Add(lstHeaders[toolIndex], outval); toolIndex++; newcols.Add(outval); } Console.WriteLine(tool["name"]); tool.Dump(); items.Add(tool); } } } } } /// /// Single-line string parser /// string parseString(string s) { // the default output is the input string string result = s; // replace all HTML entities with characters result = HttpUtility.HtmlDecode(s); // if the string contains wikitext markup, parse it // otherwise, return propertyValue if (new string[] { "[[", "{{" }.Any(str => s.Contains(str))) { if (s.Contains("[[")) { // this string has one or more [[wikilinks]] // extract all [[wikilinks]] Match m = Regex.Match(s, @"\[\[(.+?)\]\]"); // .+? means lazy (ungreedy) matching while (m.Success) { // check for File: first // then check for pipe without File: string linktext = m.Value; if (linktext.Contains("File")) { // this link contains a file. do things. if (linktext.Contains("Level")) { // [[File:Fishing Skill Icon.png|24px|link=]] [[Fishing]] Level 2 // Fishing]] Level 2 // retrieve whole wikilink and concatenate second and third values Match mx = Regex.Match(linktext, @"\[\[File.+?\]\] \[\[(.+)\]\]( Level [0-9]+)", RegexOptions.IgnoreCase); // for every match, perform string replacement while (mx.Success) { // Fishing Level 2 string strOld = mx.Groups[1].Captures[0].Value; string strNew = mx.Groups[2].Captures[0].Value; result = result.Replace(strOld, strNew); mx = mx.NextMatch(); } } else { // [[File:Shirt001.png|center]] // [[File:Axe.png]] // retrieve whole wikilink, second capture group is the filename Match mx = Regex.Match(linktext, @"\[\[File:([\w -_.]+).*?\]\]", RegexOptions.IgnoreCase); // for every match, perform string replacement while (mx.Success) { // Shirt001.png // Axe.png string strOld = mx.Groups[0].Captures[0].Value; string strNew = mx.Groups[1].Captures[0].Value; result = result.Replace(strOld, strNew); mx = mx.NextMatch(); } } } else { // [[The Mines]] // [[Random Events#Meteorite|meteorite]] // this is a regular ol' link if (linktext.Contains("|")) { string[] linkProps = m.Value.Split(new string[] { "|" }, StringSplitOptions.None); result = result.Replace(m.Value, linkProps[1]); } else { result = result.Replace(m.Value, m.Value.Substring(2, m.Value.Length - 3)); } } m = m.NextMatch(); } } if (s.Contains("{{")) { // this string has one or more {{wikitext templates}} // extract all {{wikitext templates}} Match m = Regex.Match(s, @"\{\{(.+?)\}\}"); // .+? means lazy (ungreedy) matching while (m.Success) { // this is the raw wikitext of the match string wikitext = m.Value; // this is the string minus the start and end brackets string input = wikitext.Substring(2, wikitext.Length - 4); // return this if all else fails result = input; // we can just split by the pipe for this - all {{wikitext templates}} use pipe as the delimiter string[] segments = input.Split(new string[] { "|" }, StringSplitOptions.RemoveEmptyEntries); // for case-insensitive string.Contains() string templateName = segments[0].ToLower(); string littletext = wikitext.ToLower(); if (segments.Count() > 1) { if (templateName == "name") { if (segments.Count() == 2) { // return second segment as-is result = segments[1]; } else { // {{name|Mining|Level 9|class=inline}} // everything past the third segment (e.g. 50) can be ignored if (littletext.Contains("level")) { // {{name|Farming|Level 3|image=Farming Skill Icon.png}} // Farming Level 3 // test with Equipment result = segments[1] + " " + segments[2]; } else if (littletext.Contains("+")) { // {{name|Defense|+3} // +3 Defense // test with Clothing and Weapons result = segments[2] + " " + segments[1]; } else { // {{name|Omni Geode|50|...}} // Omni Geode {50) result = segments[1] + " (" + segments[2] + ")"; } } } else if (templateName == "npc") { // {{NPC|Jodi|Mother}} // Jodi (Mother) if (segments.Count() > 2) result = segments[1] + " (" + segments[2] + ")"; else result = segments[1]; } else if (templateName == "price") { // {{Price|30|Currency}} // JOPK, Qi, Star Token (if third parameter is empty, this is regular gold) result = segments[1]; if (segments.Count() > 2) { switch (segments[2]) { case "JOPK": result += " tokens"; break; case "Qi": result += " Qui coins"; break; case "Token": result += " star tokens"; break; } } else { result += "g"; } } else if (templateName == "description") { // {{Description|Wild Horseradish}} // {{Description|Recipe|Lucky Lunch}} // this returns a description string // use this API call: // https://stardewvalleywiki.com/mediawiki/api.php?action=expandtemplates&prop=wikitext&format=json&text=... // result["expandtemplates"]["wikitext"] result = Wikidata(wikitext, "wikitext"); } else { // this is wrong result += " ***BAD INPUT***"; } m = m.NextMatch(); } } } } // clean

tags from result result = result.Replace("

", @"\n"); result = result.Replace("

", ""); return result; } /// /// String parser for delimited lists /// static void parseSource(string strInput, string strStart, string strEnd, ref List lstSauce) { if (strInput.Contains("Abandoned House Icon.png") && !lstSauce.Contains("Hat Mouse")) { lstSauce.Add("Hat Mouse"); return; } // add blacklisted entries here List avoid = new List() { }; // clean string of extraneous characters that break shit strInput = strInput.Replace("{{!}}", "").Replace("''", "").Replace("'''", ""); while (strInput.IndexOf(strStart) > -1) { // trim any whitespace strInput = strInput.Trim(); // find the first instance of the start characters int start = strInput.IndexOf(strStart); // find the first instance of the end characters int end = strInput.IndexOf(strEnd) + 2; // get the length of the string to be extracted int length = end - start; // extract the string string sub = strInput.Substring(start, length); // remove the start and end characters sub = sub.Substring(2, (sub.Length - 4)); // remove the extracted string strInput = strInput.Remove(start, length); // clean up the substring // don't include items that match the blacklist sub = sub.Replace("''", ""); //"name|Fishing|Level 2|image=Fishing Skill Icon.png" //"name|Bug Meat|1" if (!avoid.Any(sub.Contains)) { if (sub.Contains("|")) { string[] strSep = { "|" }; string[] subSplit = sub.Split(strSep, System.StringSplitOptions.RemoveEmptyEntries); if (sub.Contains("Bundle")) { sub = subSplit[1] + " Bundle"; } else if (strInput.Contains("ingredients")) { sub = subSplit[1]; sub += subSplit.Count() == 3 ? " (" + subSplit[2] + ")" : ""; } else if (strInput.Contains("recipe")) { Console.Write(sub, " | "); } else { sub = subSplit[1]; } } lstSauce.Add(sub); } } // if (lstSauce.Count() == 1) // { // lstSauce.Add(null); // } } // function for parsing single line values // this needs reworking // bunch of regex all at once is fake & gay string parseStringOld(string propertyValue) { if (propertyValue.Contains("Traveling Cart")) { //Util.Break(); // there's something funny } // {{name=Skill|Level #|imge=...}} Match m_new = Regex.Match(propertyValue, @"(?<={{name\|)[\w\s']+\|Level [0-9]+(?=\|.+)"); // [[File:...]] [[Skill]] Level # Match m_old = Regex.Match(propertyValue, @"(?!\[\[File.+\]\] \[\[)\w+\]\] Level [0-9]+", RegexOptions.IgnoreCase); // {{NPC|...|...[[..]]}} // {{NPC|...|...}} Match m_npc = Regex.Match(propertyValue, @"(?<=\{\{NPC\|)[A-Za-z0-9 \-\+\|]+(?=\[\[|\}\})"); // {{description|...}} Match m_desc = Regex.Match(propertyValue, @"(?<=\{\{description\|)[A-Za-z0-9 \-\+\|]+(?=|\}\})", RegexOptions.IgnoreCase); // clean up wikilink syntax Match m_link = Regex.Match(propertyValue, @"(?<=\[\[).+?(?=\]\])"); // .+? means lazy (ungreedy) matching // remove wikilink brackets Match m_bckt = Regex.Match(propertyValue, @"(?<=\[\[)[\w\s']+(?=\]\])"); // {{name|Defense|+3}} Match m_stats = Regex.Match(propertyValue, @"(?<=\{\{name\|)[\w\s']+\|\+[0-9]+(?=\}\})"); // {{price|15000}} Match m_price = Regex.Match(propertyValue, @"(?<=\{\{price\|)[\w\s']+(?=\}\})"); // {{name|Omni Geode|50}} Match m_quantity = Regex.Match(propertyValue, @"(?<=\{\{name\|)[\w\s']+\|\+[0-9]+(?=\}\})"); // {{name|...}} Match m_curly = Regex.Match(propertyValue, @"(?<=\{\{name\|)[\w\s']+(?=\||\})"); string result = propertyValue; if (m_new.Success) { result = m_new.Value.Replace("|", " "); } else if (m_old.Success) { result = m_old.Value.Replace("]]", ""); } else if (m_npc.Success) { result = m_npc.Value.Replace("|", " - "); if (result.Contains("+")) { result += "hearts"; } } else if (m_desc.Success) { result = m_desc.Value; } else if (m_link.Success) { string tempval = propertyValue; while (m_link.Success) { // check for File: first // then check for pipe without File: string linktext = m_link.Value; if (linktext.Contains("File:")) { Match fileMts = Regex.Match(linktext, @"(?<=File:)[\w .]+"); tempval = tempval.Replace(linktext, fileMts.Value); } else if (linktext.Contains("|")) { string[] linkProps = m_link.Value.Split(new string[] { "|" }, StringSplitOptions.None); tempval = tempval.Replace(m_link.Value, linkProps[1]); } m_link = m_link.NextMatch(); } result = tempval.Replace("[", "").Replace("]", ""); } else if (m_bckt.Success) { result = (propertyValue.Replace("[", "")).Replace("]", ""); } else if (m_stats.Success) { List stats = new List(); // there might be more than one while (m_stats.Success) { // split value by pipe // display as val[1] + " " + val[0] string[] props = m_stats.Value.Split(new string[] { "|" }, StringSplitOptions.None); stats.Add((props[1] + " " + props[0]).Replace("{", "").Replace("}", "")); m_stats = m_stats.NextMatch(); } // output as multiline string result = String.Join("\n", stats); } else if (m_price.Success) { string price = "{{price|" + m_price.Value + "}}"; result = propertyValue.Replace(price, m_price.Value); } else if (m_quantity.Success) { } else if (m_curly.Success) { result = m_curly.Value; } return result; } /// /// global constants for different functions /// public class Globals { /// /// list of blacklisted items by name - these will be skipped /// public static List Avoid = new List { // wikitext characters "<", "{", "|-", "|}", // ignored items "Brown Egg", "Animals#", "Stone Owl", "Panda Hat", // these tools are parsed separately "Axes", "Hoes", "Pickaxes", "Trash Cans", "Watering Cans" // these are broken and need to be fixed "Coop", "Barn", "Haunted Skull", "Jellies and Pickles", "Shed", "Slime" }; /// /// list of categories for navbox wikitext parsing /// public static string[] Categories = { "Animals", // 0 "Artifacts", // 1 "Artisan Goods", // 2 "Buildings", // 3 "Clothing", // 4 "Crop", // 5 "Decor", // 6 "Equipment", // 7 "Fish", // 8 "Foraging", // 9 "Furniture", // 10 "Ingredients", // 11 "Lighting", // 12 "Minerals", // 13 "Monsters", // 14 "Recipes", // 15 "Resources", // 16 "Seeds", // 17 "Tree", // 18 "Tools", // 19 "Villagers", // 20 "Warp Totems", // 21 "Weapons" // 22 }; /// /// Price multiplier for each Quality rating /// public static OrderedDictionary Quality = new OrderedDictionary { { "base", 1 }, { "silver", 1.25 }, { "gold", 1.5 }, { "iridium", 2 } }; public static OrderedDictionary BuffQuality = new OrderedDictionary { { "base", 0 }, { "silver", 1 }, { "gold", 2 }, { "iridium", 4 } }; /// /// Price multiplier for each Profession /// public static OrderedDictionary Professions = new OrderedDictionary { { "artisan", 1.4 }, { "rancher", 1.2 }, { "gemologist", 1.3 }, { "tiller", 1.1 }, { "blacksmith", 1.5 }, { "forester", 1.25 }, { "fisher", 1.25 }, { "angler", 1.5 } }; /// /// Produce-to-ArtisanGood relationship /// public static OrderedDictionary ArtisanGoods = new OrderedDictionary { { "Hops", "Pale Ale" }, { "Wheat", "Beer" }, { "Honey", "Mead" }, { "Milk", "Cheese" }, { "Large Milk", "Cheese" }, { "Goat Milk", "Goat Cheese" }, { "Large Goat Milk", "Goat Cheese" }, { "Coffee Bean", "Coffee" }, { "Tea Leaves", "Green Tea" }, { "Wool", "Cloth" }, { "Egg", "Mayonnaise" }, { "Large Egg", "Mayonnaise" }, { "Void Egg", "Void Mayonnaise" }, { "Dinosaur Egg", "Dinosaur Mayonnaise" }, { "Truffle", "Truffle Oil" }, { "Corn", "Oil" }, { "Sunflower", "Oil" }, { "Sunflower Seeds", "Oil" }, { "Sturgeon Roe", "Caviar" }, { "Roe", "Aged Roe" }, }; /// /// List of flowers for honey /// public static List Flowers = new List { { "Blue Jazz" }, { "Fairy Rose" }, { "Poppy" }, { "Summer Spangle" }, { "Sunflower" }, { "Tulip" } }; }