A console tool for scraping the official Stardew Valley Wiki and serializing it into a JSON object for Enchiridion Reader.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1637 lines
43 KiB

<Query Kind="Program">
<Reference>&lt;RuntimeDirectory&gt;\System.Web.dll</Reference>
<NuGetReference>Newtonsoft.Json</NuGetReference>
<Namespace>Newtonsoft.Json.Linq</Namespace>
<Namespace>System.Collections.Specialized</Namespace>
<Namespace>System.Net</Namespace>
<Namespace>Newtonsoft.Json</Namespace>
<Namespace>System.Web</Namespace>
</Query>
void Main()
{
Util.AutoScrollResults = true;
Console.Write(@"
This script uses a standard mediawiki navbox template to build JSON data for Flexbook.
Override string[] Categories to limit results for testing.
Set the metadata variables for the template below.
Complete:
* Tool item objects for basic tools
* Item name parsing for ignored names and wikilink syntax
======================================================================================
");
// meta
OrderedDictionary meta = new OrderedDictionary();
meta["author"] = "ClairelyClaire";
meta["version"] = "0.1";
meta["published"] = "1/4/2020";
int startCat = 0;
int subCount = Globals.Categories.Count() - startCat;
// override startCat to start at category - count to category then subtract 1
// override subCount to limit length, minimum size 1
startCat = 19;
subCount = 1;
string[] CategoryList = Globals.Categories.ToList<string>().GetRange(startCat, subCount).ToArray();
// categories that need completely special processing
// Festivals, Locations, Seasons, SkillsStats
OrderedDictionary sauce = new OrderedDictionary();
sauce["metadata"] = meta;
List<OrderedDictionary> lstCategories = new List<OrderedDictionary>();
// create a new blank template
// OrderedDictionary template = new OrderedDictionary();
//OrderedDictionary template = new OrderedDictionary();
Dictionary<string,object> template = new Dictionary<string,object>();
// keyed template - helper for the template creator
SortedDictionary<string,object> templateKeys = new SortedDictionary<string,object>();
// every template must have a "name" parameter
template["name"] = "Name";
foreach (string strCategory in CategoryList)
{
Console.WriteLine("\n------------------------------\n" + strCategory + "\n------------------------------");
// this is the category name for the Navbox template on the wiki
string cat = strCategory;
// remove spaces from Navbox template category name
if (strCategory.Contains(" "))
cat = cat.Replace(" ","");
// return the Navbox wikitext
// this is what we have to parse to create subcategories and items
string wikitext = Wikidata(cat,"category");
// array of all lines in the wikitext; this is a wiki table so we can just split by newline
List<String> source = wikitext.Split(new string[] { "\n" }, StringSplitOptions.None).ToList<String>();
// set all our empty variables //
// new array of lines to be processed by the data scraper
List<String> lines = new List<String>();
// holds the count of subcategories
// if nonzero, do special things
int rowcount = 0;
// holds the previous line's value
string prev = "";
OrderedDictionary categorySauce = new OrderedDictionary();
// use Category, not cat - this is the display name
categorySauce["category"] = strCategory;
List<OrderedDictionary> subcategorySauce = new List<OrderedDictionary>();
string currentSubcat = "";
int currentSubI = -1;
for (int i = 0; i < source.Count(); i++)
{
string line = source[i];
// if the line starts with a blacklisted string (item or subcategory), skip it
if (Globals.Avoid.Any(b => line.StartsWith(b)))
{
continue;
}
// this is a header, which means it's a subcategory
else if (line.StartsWith("!"))
{
// empty string for subcategory name
string subcat = "";
// find rowspan for sub-subcategories
if (line.Contains("rowspan"))
{
// this means the given subcategory has its own children
// so get the subcategory name to prepend to the children
// get the number of rows (sub-subcategories)
Match mt = Regex.Match(line, @"(?!\! rowspan=""{0,})[0-9]+(?=""{0,}.+)");
// set the number of rows (sub-subcategories)
rowcount = Convert.ToInt32(mt.Value);
// now get the value of this line
mt = Regex.Match(line, @"(?<=\[\[).+(?=\]\])|(?<=\|)[A-Z][A-Za-z ]+");
prev = mt.Value.Trim();
continue;
}
else
{
// this must be something worth processing
if (rowcount > 0 && prev != "")
{
// sub-subcategory
// example: Fishing - Bait
// this value is "Bait"
Match mt = Regex.Match(line, @"(?<=\[\[)(.+\||)(.+)(?=\]\])");
// blank string
if (mt.Success)
{
string cur = mt.Groups[mt.Groups.Count - 1].Value;
// create the new subcategory title
subcat = prev + " - " + cur;
}
else
{
mt = Regex.Match(line, @"(?!\|\s*)[A-Za-z ]+");
// create the new subcategory title
subcat = prev + " - " + mt.Value;
}
// subtract subcategory count by 1
rowcount--;
if (rowcount <= 0)
{
rowcount = 0;
prev = "";
}
}
else if (line.Contains("[[") && line.Contains("|"))
{
// ![[Wiki Link|Display Name]]
Match mt = Regex.Match(line, @"(?!.+\|\s*)[A-Za-z ]+");
subcat = mt.Groups[mt.Groups.Count - 1].Value.Trim();
}
else if (line.Contains("[["))
{
// ![[Wiki Link]]
Match mt = Regex.Match(line, @"(?<=\[\[).+(?=\]\])");
subcat = mt.Value.Trim();
}
else if (line.Contains(cat) && !line.Contains("colspan"))
{
// subcategory equals category
// we know this isn't the navbox header row since there's no colspan
Match mt = Regex.Match(line, @"(?!\!.+\|\s*)[A-Za-z ]+");
subcat = mt.Value.Trim();
}
else
{
Match mt = Regex.Match(line, @"(?!.+\|\s*)[A-Za-z ]+");
subcat = mt.Value;
}
if (subcat != "")
{
// create a new subcategory object and populate it with its name
OrderedDictionary sc = new OrderedDictionary();
sc.Add("subcategory", subcat.Trim());
subcategorySauce.Add(sc);
currentSubcat = subcat;
currentSubI++;
lines.Add(subcat.Trim());
}
}
}
else if (line.StartsWith("|"))
{
// if the property value is ever any of these, overwrite it with a blank string
string[] itemPropsBlacklist = { "", "|", "N/A", "\n" };
// THIS IS WHERE THE INFOBOX MAGIC HAPPENS
// pass mt.Value to the wiki API
Match mt = Regex.Match(line, @"(?<=\[\[).+?(?=\]\])");
List<OrderedDictionary> Items = new List<OrderedDictionary>();
while (mt.Success)
{
OrderedDictionary itemProps = new OrderedDictionary();
string val = mt.Value;
if (!Globals.Avoid.Any(val.Contains))
{
// if this is a party hat, do some special stuff
if (val.Contains("Party Hat"))
{
// split on the pipe
string[] props = val.Split(new string[] { "|" }, StringSplitOptions.None);
if (props[1] != "Party Hat")
{
itemProps["name"] = props[0];
}
else
{
mt = mt.NextMatch();
continue;
}
}
else if (val.Contains("|"))
{
// split on the pipe
string[] props = val.Split(new string[] { "|" }, StringSplitOptions.None);
itemProps["name"] = props[0];
itemProps["display_name"] = props[1];
}
else
{
itemProps["name"] = val;
}
// get individual item infoboxes here
// check all values against itemPropsBlacklist
string strItem = itemProps["name"].ToString();
ItemBox(strItem, strCategory)
}
// if itemProps is populated, add it to the Items list
if (itemProps.Count > 0)
{
Items.Add(itemProps);
lines.Add(val);
}
mt = mt.NextMatch();
}
// if this is Tools > Basic, parse the tool pages separately
if (cat == "Tools" && subcategorySauce[currentSubI]["subcategory"].ToString() == "Basic")
{
ParseTools(ref itemPropsBlacklist, ref Items);
}
// make sure items are alphabetized
Items.Sort((OrderedDictionary a, OrderedDictionary b) => a["name"].ToString().CompareTo(b["name"].ToString()));
subcategorySauce[currentSubI]["items"] = Items;
}
else
{
// do nothing
}
categorySauce["subcategories"] = subcategorySauce;
if (currentSubI >= 0 && currentSubI < subcategorySauce.Count())
{
if (subcategorySauce[currentSubI]["items"] != null)
{
// subcategorySauce[currentSubI].Dump();
}
}
}
lstCategories.Add(categorySauce);
}
// sort the template attributes?
List<KeyValuePair<string,object>> template2 = template.ToList<KeyValuePair<string,object>>();
template2.Sort((x,y) => x.Key.CompareTo(y.Key));
OrderedDictionary templateDict = new OrderedDictionary();
foreach (KeyValuePair<string,object> kvp in template2)
{
templateDict[kvp.Key] = kvp.Value;
}
string templateJSON = JsonConvert.SerializeObject(templateKeys, Newtonsoft.Json.Formatting.Indented);
//templateJSON.Dump();
sauce["template"] = templateDict;
sauce["content"] = new OrderedDictionary();
((OrderedDictionary)sauce["content"])["categories"] = lstCategories;
//sauce.Dump();
string jo = JsonConvert.SerializeObject(sauce, Newtonsoft.Json.Formatting.Indented);
// save a new one if files already exist
// DirectoryInfo dataDir = new DirectoryInfo(@"F:\Projects\Flexbook\Test Data");
string pcname = Environment.MachineName.ToLower();
string drv = "C";
switch (pcname)
{
case "breve":
drv = "D";
break;
case "zelda":
drv = "F";
break;
}
string dir = drv + @":\Projects\Flexbook\Test Data";
// dir = @"D:\Source\Flexbook\Test Data";
DirectoryInfo dataDir = new DirectoryInfo(dir);
FileInfo[] Files = dataDir.GetFiles("stardewsample-all*.json");
File.WriteAllText(dir + @"\stardewsample-all-" + Files.Count() + ".json", jo);
// build the template key file
string jtk = JsonConvert.SerializeObject(templateKeys, Newtonsoft.Json.Formatting.Indented);
File.WriteAllText(dir + @"\stardewsample-all-" + Files.Count() + "-template-keys.json", jtk);
}
// needs to be a void with a ref to the existing item dictionary, to which items should be added
// also refs to template and templateKeys
OrderedDictionary ItemBox(string strItem, string strCategory)
{
// skip if the node exists in our blacklist
if (!Globals.Avoid.Contains(strItem) && strItem != Globals.Categories[0])
{
// get first section of item's wiki page
// this contains the infobox
// this returns JSON, not XML
string strWiki = Wikidata(strItem, "item");
// strip out the content between the starting {{ and the first line break
Regex rgx = new Regex("Infobox.*");
string strRes = rgx.Replace(strWiki, "");
// strip out everything past the closing }}
// find index of closing }}, which is always preceded by a newline
string strRep2 = "\n}}";
int intStart = strRes.IndexOf(strRep2);
if (intStart >= 0)
{
// extra pipe in the replacement string gives us the right formatting for our object
strRes = strRes.Replace(strRes.Substring(intStart), "\n|}}");
}
// split the string by the leading | on each line
string[] strSep = { "\n|" };
string[] strSplit = strRes.Split(strSep, System.StringSplitOptions.RemoveEmptyEntries);
string[] strSplit2 = { "=" };
// instantiate new item, which contains the item name and a dynamic object containing its properties
OrderedDictionary itm = new OrderedDictionary();
OrderedDictionary ordProps = new OrderedDictionary();
ordProps["name"] = strItem;
// String manipulation!!
foreach (string s in strSplit)
{
string[] strElements = s.Split(strSplit2, 2, System.StringSplitOptions.RemoveEmptyEntries);
char[] strTrim = { ' ', '\n' };
int i = 0;
foreach (var s2 in strElements.ToList())
{
string s3 = s2.TrimStart(strTrim);
string s4 = s3.TrimEnd(strTrim);
strElements[i] = s4;
i++;
}
if (strElements.Length > 1)
{
string propertyName = strElements[0];
dynamic propertyValue = strElements[1];
//if (propertyName == "source")
if (new[] {
"source", "as", "dr", "md", "os",
"ingredients", "tingredients", "season", "produce", "drops",
"location", "occupants", "materials", "animals",
"favorites", "family", "friends",
"buff", "seed" }.Contains(propertyName))
{
switch (propertyName)
{
case "source":
propertyName = "sources";
break;
case "as":
propertyName = "wigglies";
break;
case "dr":
propertyName = "reward";
break;
case "md":
propertyName = "drops";
break;
case "os":
propertyName = "othersrc";
break;
}
List<String> sourceList = new List<String>();
string s2 = s;
if (s2.Contains("[[") || s2.Contains("{{"))
{
parseSource(s2, "[[", "]]", ref sourceList);
parseSource(s2, "{{", "}}", ref sourceList);
}
else if (s2.Contains("="))
{
sourceList.Add(propertyValue);
}
else
{
sourceList.Add(s2);
}
sourceList.Sort();
propertyValue = sourceList.ToArray();
}
else if (propertyName == "profession")
{
// this is probably a comma-separated list
// it's used by the infobox to calculate prices
string[] list = (propertyValue as string).Split(new string[] { "," }, StringSplitOptions.None);
propertyValue = list;
}
else
{
propertyValue = parseString(propertyValue);
}
// fix price key name
propertyName = propertyName == "price" ? "sellprice" : propertyName;
// strip double apostrophes from strings
if (propertyValue is string && propertyValue.Contains("''"))
{
propertyValue = (propertyValue as string).Replace("''", "");
}
//ordProps.Add(propertyName, propertyValue);
ordProps[propertyName] = propertyValue;
}
}
// build description template
// {{Description|Prismatic%20Shard}}
string descTemplate = "{{Description|" + strItem + "}}";
// add a description
ordProps["description"] = Wikidata(descTemplate, "wikitext"); ;
// this is where we can alphabetize the properties if we really want to
//itm.ItemProps = new SortedDictionary<string,object>(dynProps);
itm = ordProps;
// add profession property where necessary
if (itm["name"].ToString() == "Dinosaur Egg")
{
itm["profession"] = new string[] { "Rancher", "Artisan" };
}
// if it's a fish, add the right professions
if (strCategory == "Fish")
{
itm["profession"] = new string[] { "Fisher", "Angler" };
}
// calculate quality prices
if ((string)itm["quality"] == "true" || (string)itm["iridium"] == "true")
{
GetPrices(ref itm);
}
// now we can do shit with the price based on profession
if (itm["profession"] != null && itm["sellprice"] != null)
{
// calculate price based on profession
string[] professions = (string[])itm["profession"];
foreach (string profession in professions)
{
GetPrices(ref itm, profession.ToLower());
}
}
// add a type of vegetable-p to hops, wheat, and tea leaves0
if (new[] { "Hops", "Wheat", "Tea Leaves" }.Contains(itm["name"].ToString()))
{
itm["type"] = "vegetable-p";
}
// add a type of vegetable to corn
if (itm["name"].ToString() == "Corn")
{
itm["type"] = "vegetable";
}
// add a type of flower to members of Flowers
if (Globals.Flowers.Contains(itm["name"].ToString()))
{
itm["type"] = "flower";
}
string artPrice(object p)
{
string pInt = Math.Truncate(Convert.ToInt32(p) * 1.4).ToString();
return pInt;
};
if (itm["type"] != null)
{
// get the base sell price
int sprice = Convert.ToInt32(itm["sellprice"]);
// if it's a fruit or vegetable, calculate price of artisan good
// fruit
if (itm["type"].ToString() == "fruit")
{
// this can be made into jelly and wine
// wine is available in all qualities
// jelly = (1 * 2) + 50
// wine = 1 * 3
// jelly
itm["price_jelly"] = ((sprice * 2) + 50).ToString();
itm["price_jelly_artisan"] = artPrice(itm["price_jelly"]);
// wine
GetPrices(ref itm, "wine_");
}
// vegetable, vegetable-p
else if (itm["type"].ToString().StartsWith("vegetable"))
{
// this can be made into pickles and juice
// pickles = (1 * 2) + 50
// juice = 1 * 2.25
// pickles
itm["price_pickles"] = ((sprice * 2) + 50).ToString();
itm["price_pickles_artisan"] = artPrice(itm["price_pickles"]);
if (itm["type"].ToString() == "vegetable")
{
itm["price_juice"] = ((sprice * 2) + 50).ToString();
itm["price_juice_artisan"] = artPrice(itm["price_juice"]);
}
}
// flower
else if (itm["type"].ToString() == "flower")
{
// 100g + (base x 2)
itm["price_honey"] = (100 + (sprice * 2)).ToString();
itm["price_honey_artisan"] = artPrice(itm["price_honey"]);
}
}
// if there's at least one artisan good associated with this
// get that good's data and append it to the item
if (Globals.ArtisanGoods.Contains(itm["name"]))
{
// get the name of the artisan good from the dictionary
string ArtGoodName = Globals.ArtisanGoods[itm["name"].ToString()].ToString();
// add a property to the item for the name of the artisan good
itm["artisan_good"] = ArtGoodName;
// generate the infobox data for the artisan good
// we'll take the prices from this
OrderedDictionary ArtGood = ItemBox(ArtGoodName, strCategory);
// we can't easily filter an OrderedDictionary's keys
// convert list of keys to string array
string[] ArtGoodKeys = new string[ArtGood.Count];
// copy all the keys to the array
ArtGood.Keys.CopyTo(ArtGoodKeys, 0);
// filter for entities named "price_..."
IEnumerable<string> AGKeys = ArtGoodKeys.Where(agk => agk.Contains("price_"));
// for each key in the filtered list, retrieve the key-value pair from the infobox
foreach (string k in AGKeys)
{
int theValue = Convert.ToInt32(ArtGood[k]);
// this use the product name, we probably don't want his
string artName = (ArtGoodName.ToLower()).Replace(" ", "_") + "_";
// set this part of the key to "product" instead
artName = "product_";
string theKey = k.Replace("price_", "price_" + artName);
itm[theKey] = theValue.ToString(); ;
}
}
//itm.Dump();
// add to object array
// AllItems.Add(itm);
// JsonConvert.SerializeObject(ordProps, Newtonsoft.Json.Formatting.Indented).Dump();
return ordProps;
}
else
{
return null;
}
}
/// <summary>
/// Appends quality prices to item OrderedDictionary: GetPrices(ref item, string profession) // set profession to "wine_" for wine pricing
/// </summary>
void GetPrices(ref OrderedDictionary itm, string pro = null)
{
// get the base price from the item
int price = Convert.ToInt32(itm["sellprice"]);
// set the base multiplier
decimal multi = 1;
// set the string prefix for key names
string pre = "price_";
// if a profession is defined, use its multiplier and add a prefix
if (pro != null)
{
if (pro == "wine_")
{
multi *= 3;
pre += pro;
}
else
{
multi *= Convert.ToDecimal(Globals.Professions[pro]);
pre += pro.ToLower() + "_";
}
}
// calculate prices and add key+value pairs to item
int i = 0;
foreach (DictionaryEntry qual in Globals.Quality)
{
string key = pre + i + "_" + qual.Key;
decimal newprice = price * Convert.ToDecimal(qual.Value) * multi;
itm[key] = Math.Truncate(newprice).ToString();
i++;
}
// calculate artisan prices for wine
if (pro == "wine_")
{
i = 0;
foreach (DictionaryEntry qual in Globals.Quality)
{
string key = pre + "artisan_" + i + "_" + qual.Key;
decimal bprice = Convert.ToDecimal(itm["price_wine_" + i + "_" + qual.Key]);
decimal newprice = bprice * Convert.ToDecimal(1.4);
itm[key] = Math.Truncate(newprice).ToString();
i++;
}
}
if ((string)itm["iridium"] != "true")
{
itm.Remove(pre + "3_iridium");
}
}
/// <summary>
/// Appends health and energy buffs to item OrderedDictionary: GetBuffs(ref item)
/// <summary>
void GetBuffs(ref OrderedDictionary itm)
{
// get base edibility score
int basic = Convert.ToInt32(itm["edibility"]);
// if <= -300, shit ain't edible
if (basic <= -300)
{
itm["edible"] = "false";
}
else
{
itm["edible"] = "true";
// if > -300 AND if < 0
if (basic > -300 && basic < 0)
{
foreach (DictionaryEntry m in Globals.BuffQuality)
{
string key = m.Key.ToString();
int quality = Convert.ToInt32(m.Value);
//now we do math for energy
int value = Convert.ToInt16(Math.Ceiling(basic * 2.5) + basic * quality);
itm["energy_" + m.Value + "_" + key] = value;
}
}
else if (basic == 0)
{
// this shit's worthless, yo
itm["energy_base"] = 0;
}
else if (basic > 0)
{
foreach (DictionaryEntry m in Globals.BuffQuality)
{
string key = m.Key.ToString();
int quality = Convert.ToInt32(m.Value);
// now we do math for energy
int evalue = Convert.ToInt16(Math.Truncate(Math.Ceiling(basic * 2.5) + basic * quality));
int hvalue = Convert.ToInt16(Math.Truncate(evalue * 0.45));
itm["energy_" + m.Value + "_" + key] = evalue;
itm["health_" + m.Value + "_" + key] = hvalue;
}
}
else
{
// this is so wrong
// how the fuck did you even get here?
}
}
}
/// <summary>
/// Returns a string of wikitext by search string and query type
/// </summary>
String Wikidata(string strInput, string strType)
{
string strURL = "https://stardewvalleywiki.com/mediawiki/api.php?action=query&prop=revisions&rvprop=content&format=json&rvsection=1&titles=";
// create the URL based on the type
// set the XML node path
switch (strType)
{
case "item":
strURL = "http://stardewvalleywiki.com/mediawiki/api.php?action=query&prop=revisions&rvprop=content&format=json&rvsection=0&titles=" + strInput;
// "[query][pages][$pageid$][revisions][0][*]";
break;
case "image":
strURL = "http://stardewvalleywiki.com/mediawiki/api.php?action=query&prop=imageinfo&iiprop=url&continue=&format=json&titles=File:" + strInput;
// "[query][pages][$pageid$][imageinfo][0][url]"; // this is the actual image file from the wiki
break;
case "category":
strURL = "https://stardewvalleywiki.com/mediawiki/api.php?action=query&prop=revisions&rvprop=content&format=json&rvsection=0&titles=Template:Navbox" + strInput;
// "[query][pages][$pageid$][revisions][0][*]";
break;
case "wikitext":
strURL = "https://stardewvalleywiki.com/mediawiki/api.php?action=expandtemplates&prop=wikitext&format=json&text=" + strInput;
// "[expandtemplates][wikitext]";
break;
default:
break;
}
string raw = new WebClient().DownloadString(strURL);
JObject obj = JObject.Parse(raw.ToString());
JObject objPage = obj;
string pageid = "";
if (strType != "wikitext")
{
objPage = (JObject)obj["query"]["pages"];
pageid = "";
foreach (JProperty p in objPage.Properties())
{
if (pageid == "")
{
pageid = p.Name;
}
else
{
break;
}
}
}
switch (strType)
{
case "item":
case "category":
return objPage[pageid]["revisions"][0]["*"].ToString();
case "image":
return objPage[pageid]["imageinfo"][0]["url"].ToString();
case "wikitext":
return objPage["expandtemplates"]["wikitext"].ToString();
default:
return "";
}
}
/// <summary>
/// Special parser for tool information
/// </summary>
void ParseTools(ref string[] blacklist, ref List<OrderedDictionary> items)
{
// the master list of tools
List<OrderedDictionary> Tools = new List<OrderedDictionary>();
// tool groups
string[] strTools = { "Axes", "Hoes", "Pickaxes", "Trash Cans", "Watering Cans" };
foreach (string strTool in strTools)
{
// the URL to hit for information
string strURL = "https://stardewvalleywiki.com/mediawiki/api.php?action=query&prop=revisions&rvprop=content&format=json&rvsection=1&titles=" + strTool;
// the raw JSON output from the API response
var toolRaw = new WebClient().DownloadString(strURL);
// parse the output into a JSON object model
JObject toolObj = JObject.Parse(toolRaw.ToString());
// get the page element
JObject toolPage = (JObject)toolObj["query"]["pages"];
// we have to find the page ID, which is super annoying
string toolPageId = "";
foreach (JProperty p in toolPage.Properties())
{
if (toolPageId == "")
{
toolPageId = p.Name;
}
else
{
break;
}
}
// ok now we have the page ID
// get the page content
string strInput = toolPage[toolPageId]["revisions"][0]["*"].ToString();
// if there's content, process it
if (strInput.Length > 0)
{
if (strInput.Contains("wikitable"))
{
// this is wikitable markup
// convert into objects
// find the column headers
// these are the object properties
Match headerMatches = Regex.Match(strInput, @"(?<=\n\!)(.+)");
List<string> lstHeaders = new List<string>();
// we need this information to know where the table content starts
int hIdx = 0;
int hLen = 0;
// create a list of keys from wikitable headers
while (headerMatches.Success)
{
string val = headerMatches.Value.Trim().ToLower();
hLen = headerMatches.Length;
hIdx = headerMatches.Index;
lstHeaders.Add(val == "improvements" ? "notes" : val);
headerMatches = headerMatches.NextMatch();
}
// extract the table rows
// +4 for the \n|-\n at the beginning of this section
int startI = hIdx + hLen + 4;
int endI = strInput.Length - startI;
// this is the wikitext for the table rows
string wikiRows = strInput.Substring(startI, endI).Trim();
// split the string by the wikitext markup for a new row: |-
List<string> lstRows = wikiRows.Split(new string[] { "\n|-\n" }, StringSplitOptions.None).ToList<String>();
foreach (string row in lstRows)
{
// split this into a list by regex
// why? because some random bitch on the stardew valley wiki refuses to allow a single-line syntax on the affected pages
// literally.
// List<string> rowcols = row.Split(new string[] { "\n|"}, StringSplitOptions.None).ToList<string>();
List<string> rowcols = Regex.Split(row, @"\n\|").ToList<string>();
// empty array of column values for this row
List<string> newcols = new List<string>();
OrderedDictionary tool = new OrderedDictionary();
int toolIndex = 0;
foreach (string rowcol in rowcols)
{
string outval = "";
string newval = rowcol;
if (rowcol.StartsWith("|"))
{
newval = rowcol.Substring(1);
}
else
{
newval = rowcol;
}
if (!blacklist.Contains(rowcol))
{
// we have a column entry to parse
if (newval.StartsWith("}"))
{
// this is the end of the table, so skip it
continue;
}
else if (newval.Contains("["))
{
// this is a link of some kind
// extract all links matching regex
// if link has a pipe, split it
// if link is a file, use the image name as the "image" dictionary entry and discard anything after the first match
// if the link isn't a file, use the displayname (second match)
Match linkMts = Regex.Match(newval, @"(?<=\[\[).+(?=\]\])");
string tempval = "";
while (linkMts.Success)
{
// check for File: first
// then check for pipe without File:
string linktext = linkMts.Value;
if (linktext.Contains("File:"))
{
Match fileMts = Regex.Match(linktext, @"(?<=File:)[\w .]+");
tempval = newval.Replace(linktext, fileMts.Value);
}
else if (linkMts.Value.Contains("|"))
{
string[] linkProps = linkMts.Value.Split(new string[] { "|" }, StringSplitOptions.None);
if (linkProps[0].Contains("File"))
{
Match fileMts = Regex.Match(linkProps[0], @"(?<=File:).+");
// this is the image name
tempval = newval.Replace(linkProps[0], fileMts.Value);
}
else
{
tempval = newval.Replace(linkMts.Value, linkProps[1]);
}
}
linkMts = linkMts.NextMatch();
}
outval = tempval.Replace("[", "").Replace("]", "");
}
else if (newval.Contains("{"))
{
// there's data to be extracted here
// see if it's a template
Match tplMts = Regex.Match(newval, @"(?<=\{\{).+(?=\}\})");
while (tplMts.Success)
{
// split the value by pipe
string[] splitVals = tplMts.Value.Split(new string[] { "|" }, StringSplitOptions.None);
if (splitVals.Count() == 3)
{
// if array has three members, output like: Copper Bar (5)
outval = splitVals[1] + " (" + splitVals[2] + ")";
}
else if (splitVals.Length == 2)
{
// if the array has two members, ouput the second
outval = splitVals[1];
}
else
{
//this is wrong
}
tplMts = tplMts.NextMatch();
}
}
else
{
// use the value as-is
outval = newval;
}
}
else
{
// this is an empty column, use a space
outval = " ";
}
// clean up extraneous wikitext markup
outval = outval.Replace("'''", "").Replace("''", "").Replace("\n\n", "\n");
tool.Add(lstHeaders[toolIndex], outval);
toolIndex++;
newcols.Add(outval);
}
Console.WriteLine(tool["name"]);
tool.Dump();
items.Add(tool);
}
}
}
}
}
/// <summary>
/// Single-line string parser
/// </summary>
string parseString(string s)
{
// the default output is the input string
string result = s;
// replace all HTML entities with characters
result = HttpUtility.HtmlDecode(s);
// if the string contains wikitext markup, parse it
// otherwise, return propertyValue
if (new string[] { "[[", "{{" }.Any(str => s.Contains(str)))
{
if (s.Contains("[["))
{
// this string has one or more [[wikilinks]]
// extract all [[wikilinks]]
Match m = Regex.Match(s, @"\[\[(.+?)\]\]"); // .+? means lazy (ungreedy) matching
while (m.Success)
{
// check for File: first
// then check for pipe without File:
string linktext = m.Value;
if (linktext.Contains("File"))
{
// this link contains a file. do things.
if (linktext.Contains("Level"))
{
// [[File:Fishing Skill Icon.png|24px|link=]] [[Fishing]] Level 2
// Fishing]] Level 2
// retrieve whole wikilink and concatenate second and third values
Match mx = Regex.Match(linktext, @"\[\[File.+?\]\] \[\[(.+)\]\]( Level [0-9]+)", RegexOptions.IgnoreCase);
// for every match, perform string replacement
while (mx.Success)
{
// Fishing Level 2
string strOld = mx.Groups[1].Captures[0].Value;
string strNew = mx.Groups[2].Captures[0].Value;
result = result.Replace(strOld, strNew);
mx = mx.NextMatch();
}
}
else
{
// [[File:Shirt001.png|center]]
// [[File:Axe.png]]
// retrieve whole wikilink, second capture group is the filename
Match mx = Regex.Match(linktext, @"\[\[File:([\w -_.]+).*?\]\]", RegexOptions.IgnoreCase);
// for every match, perform string replacement
while (mx.Success)
{
// Shirt001.png
// Axe.png
string strOld = mx.Groups[0].Captures[0].Value;
string strNew = mx.Groups[1].Captures[0].Value;
result = result.Replace(strOld, strNew);
mx = mx.NextMatch();
}
}
}
else
{
// [[The Mines]]
// [[Random Events#Meteorite|meteorite]]
// this is a regular ol' link
if (linktext.Contains("|"))
{
string[] linkProps = m.Value.Split(new string[] { "|" }, StringSplitOptions.None);
result = result.Replace(m.Value, linkProps[1]);
}
else
{
result = result.Replace(m.Value, m.Value.Substring(2, m.Value.Length - 3));
}
}
m = m.NextMatch();
}
}
if (s.Contains("{{"))
{
// this string has one or more {{wikitext templates}}
// extract all {{wikitext templates}}
Match m = Regex.Match(s, @"\{\{(.+?)\}\}"); // .+? means lazy (ungreedy) matching
while (m.Success)
{
// this is the raw wikitext of the match
string wikitext = m.Value;
// this is the string minus the start and end brackets
string input = wikitext.Substring(2, wikitext.Length - 4);
// return this if all else fails
result = input;
// we can just split by the pipe for this - all {{wikitext templates}} use pipe as the delimiter
string[] segments = input.Split(new string[] { "|" }, StringSplitOptions.RemoveEmptyEntries);
// for case-insensitive string.Contains()
string templateName = segments[0].ToLower();
string littletext = wikitext.ToLower();
if (segments.Count() > 1)
{
if (templateName == "name")
{
if (segments.Count() == 2)
{
// return second segment as-is
result = segments[1];
}
else
{
// {{name|Mining|Level 9|class=inline}}
// everything past the third segment (e.g. 50) can be ignored
if (littletext.Contains("level"))
{
// {{name|Farming|Level 3|image=Farming Skill Icon.png}}
// Farming Level 3
// test with Equipment
result = segments[1] + " " + segments[2];
}
else if (littletext.Contains("+"))
{
// {{name|Defense|+3}
// +3 Defense
// test with Clothing and Weapons
result = segments[2] + " " + segments[1];
}
else
{
// {{name|Omni Geode|50|...}}
// Omni Geode {50)
result = segments[1] + " (" + segments[2] + ")";
}
}
}
else if (templateName == "npc")
{
// {{NPC|Jodi|Mother}}
// Jodi (Mother)
if (segments.Count() > 2)
result = segments[1] + " (" + segments[2] + ")";
else
result = segments[1];
}
else if (templateName == "price")
{
// {{Price|30|Currency}}
// JOPK, Qi, Star Token (if third parameter is empty, this is regular gold)
result = segments[1];
if (segments.Count() > 2)
{
switch (segments[2])
{
case "JOPK":
result += " tokens";
break;
case "Qi":
result += " Qui coins";
break;
case "Token":
result += " star tokens";
break;
}
}
else
{
result += "g";
}
}
else if (templateName == "description")
{
// {{Description|Wild Horseradish}}
// {{Description|Recipe|Lucky Lunch}}
// this returns a description string
// use this API call:
// https://stardewvalleywiki.com/mediawiki/api.php?action=expandtemplates&prop=wikitext&format=json&text=...
// result["expandtemplates"]["wikitext"]
result = Wikidata(wikitext, "wikitext");
}
else
{
// this is wrong
result += " ***BAD INPUT***";
}
m = m.NextMatch();
}
}
}
}
// clean <p> tags from result
result = result.Replace("<p>", @"\n");
result = result.Replace("</p>", "");
return result;
}
/// <summary>
/// String parser for delimited lists
/// </summary>
static void parseSource(string strInput, string strStart, string strEnd, ref List<string> lstSauce)
{
if (strInput.Contains("Abandoned House Icon.png") && !lstSauce.Contains("Hat Mouse"))
{
lstSauce.Add("Hat Mouse");
return;
}
// add blacklisted entries here
List<String> avoid = new List<String>() { };
// clean string of extraneous characters that break shit
strInput = strInput.Replace("{{!}}", "").Replace("''", "").Replace("'''", "");
while (strInput.IndexOf(strStart) > -1)
{
// trim any whitespace
strInput = strInput.Trim();
// find the first instance of the start characters
int start = strInput.IndexOf(strStart);
// find the first instance of the end characters
int end = strInput.IndexOf(strEnd) + 2;
// get the length of the string to be extracted
int length = end - start;
// extract the string
string sub = strInput.Substring(start, length);
// remove the start and end characters
sub = sub.Substring(2, (sub.Length - 4));
// remove the extracted string
strInput = strInput.Remove(start, length);
// clean up the substring
// don't include items that match the blacklist
sub = sub.Replace("''", "");
//"name|Fishing|Level 2|image=Fishing Skill Icon.png"
//"name|Bug Meat|1"
if (!avoid.Any(sub.Contains))
{
if (sub.Contains("|"))
{
string[] strSep = { "|" };
string[] subSplit = sub.Split(strSep, System.StringSplitOptions.RemoveEmptyEntries);
if (sub.Contains("Bundle"))
{
sub = subSplit[1] + " Bundle";
}
else if (strInput.Contains("ingredients"))
{
sub = subSplit[1];
sub += subSplit.Count() == 3 ? " (" + subSplit[2] + ")" : "";
}
else if (strInput.Contains("recipe"))
{
Console.Write(sub, " | ");
}
else
{
sub = subSplit[1];
}
}
lstSauce.Add(sub);
}
}
// if (lstSauce.Count() == 1)
// {
// lstSauce.Add(null);
// }
}
// function for parsing single line values
// this needs reworking
// bunch of regex all at once is fake & gay
string parseStringOld(string propertyValue)
{
if (propertyValue.Contains("Traveling Cart"))
{
//Util.Break(); // there's something funny
}
// {{name=Skill|Level #|imge=...}}
Match m_new = Regex.Match(propertyValue, @"(?<={{name\|)[\w\s']+\|Level [0-9]+(?=\|.+)");
// [[File:...]] [[Skill]] Level #
Match m_old = Regex.Match(propertyValue, @"(?!\[\[File.+\]\] \[\[)\w+\]\] Level [0-9]+", RegexOptions.IgnoreCase);
// {{NPC|...|...[[..]]}}
// {{NPC|...|...}}
Match m_npc = Regex.Match(propertyValue, @"(?<=\{\{NPC\|)[A-Za-z0-9 \-\+\|]+(?=\[\[|\}\})");
// {{description|...}}
Match m_desc = Regex.Match(propertyValue, @"(?<=\{\{description\|)[A-Za-z0-9 \-\+\|]+(?=|\}\})", RegexOptions.IgnoreCase);
// clean up wikilink syntax
Match m_link = Regex.Match(propertyValue, @"(?<=\[\[).+?(?=\]\])"); // .+? means lazy (ungreedy) matching
// remove wikilink brackets
Match m_bckt = Regex.Match(propertyValue, @"(?<=\[\[)[\w\s']+(?=\]\])");
// {{name|Defense|+3}}
Match m_stats = Regex.Match(propertyValue, @"(?<=\{\{name\|)[\w\s']+\|\+[0-9]+(?=\}\})");
// {{price|15000}}
Match m_price = Regex.Match(propertyValue, @"(?<=\{\{price\|)[\w\s']+(?=\}\})");
// {{name|Omni Geode|50}}
Match m_quantity = Regex.Match(propertyValue, @"(?<=\{\{name\|)[\w\s']+\|\+[0-9]+(?=\}\})");
// {{name|...}}
Match m_curly = Regex.Match(propertyValue, @"(?<=\{\{name\|)[\w\s']+(?=\||\})");
string result = propertyValue;
if (m_new.Success)
{
result = m_new.Value.Replace("|", " ");
}
else if (m_old.Success)
{
result = m_old.Value.Replace("]]", "");
}
else if (m_npc.Success)
{
result = m_npc.Value.Replace("|", " - ");
if (result.Contains("+"))
{
result += "hearts";
}
}
else if (m_desc.Success)
{
result = m_desc.Value;
}
else if (m_link.Success)
{
string tempval = propertyValue;
while (m_link.Success)
{
// check for File: first
// then check for pipe without File:
string linktext = m_link.Value;
if (linktext.Contains("File:"))
{
Match fileMts = Regex.Match(linktext, @"(?<=File:)[\w .]+");
tempval = tempval.Replace(linktext, fileMts.Value);
}
else if (linktext.Contains("|"))
{
string[] linkProps = m_link.Value.Split(new string[] { "|" }, StringSplitOptions.None);
tempval = tempval.Replace(m_link.Value, linkProps[1]);
}
m_link = m_link.NextMatch();
}
result = tempval.Replace("[", "").Replace("]", "");
}
else if (m_bckt.Success)
{
result = (propertyValue.Replace("[", "")).Replace("]", "");
}
else if (m_stats.Success)
{
List<string> stats = new List<string>();
// there might be more than one
while (m_stats.Success)
{
// split value by pipe
// display as val[1] + " " + val[0]
string[] props = m_stats.Value.Split(new string[] { "|" }, StringSplitOptions.None);
stats.Add((props[1] + " " + props[0]).Replace("{", "").Replace("}", ""));
m_stats = m_stats.NextMatch();
}
// output as multiline string
result = String.Join("\n", stats);
}
else if (m_price.Success)
{
string price = "{{price|" + m_price.Value + "}}";
result = propertyValue.Replace(price, m_price.Value);
}
else if (m_quantity.Success)
{
}
else if (m_curly.Success)
{
result = m_curly.Value;
}
return result;
}
/// <summary>
/// global constants for different functions
/// </summary>
public class Globals
{
/// <summary>
/// list of blacklisted items by name - these will be skipped
/// </summary>
public static List<String> Avoid = new List<String>
{
// wikitext characters
"<", "{", "|-", "|}",
// ignored items
"Brown Egg",
"Animals#",
"Stone Owl",
"Panda Hat",
// these tools are parsed separately
"Axes",
"Hoes",
"Pickaxes",
"Trash Cans",
"Watering Cans"
// these are broken and need to be fixed
"Coop",
"Barn",
"Haunted Skull",
"Jellies and Pickles",
"Shed",
"Slime"
};
/// <summary>
/// list of categories for navbox wikitext parsing
/// </summary>
public static string[] Categories =
{
"Animals", // 0
"Artifacts", // 1
"Artisan Goods", // 2
"Buildings", // 3
"Clothing", // 4
"Crop", // 5
"Decor", // 6
"Equipment", // 7
"Fish", // 8
"Foraging", // 9
"Furniture", // 10
"Ingredients", // 11
"Lighting", // 12
"Minerals", // 13
"Monsters", // 14
"Recipes", // 15
"Resources", // 16
"Seeds", // 17
"Tree", // 18
"Tools", // 19
"Villagers", // 20
"Warp Totems", // 21
"Weapons" // 22
};
/// <summary>
/// Price multiplier for each Quality rating
/// </summary>
public static OrderedDictionary Quality = new OrderedDictionary
{
{ "base", 1 },
{ "silver", 1.25 },
{ "gold", 1.5 },
{ "iridium", 2 }
};
public static OrderedDictionary BuffQuality = new OrderedDictionary
{
{ "base", 0 },
{ "silver", 1 },
{ "gold", 2 },
{ "iridium", 4 }
};
/// <summary>
/// Price multiplier for each Profession
/// </summary>
public static OrderedDictionary Professions = new OrderedDictionary
{
{ "artisan", 1.4 },
{ "rancher", 1.2 },
{ "gemologist", 1.3 },
{ "tiller", 1.1 },
{ "blacksmith", 1.5 },
{ "forester", 1.25 },
{ "fisher", 1.25 },
{ "angler", 1.5 }
};
/// <summary>
/// Produce-to-ArtisanGood relationship
/// </summary>
public static OrderedDictionary ArtisanGoods = new OrderedDictionary
{
{ "Hops", "Pale Ale" },
{ "Wheat", "Beer" },
{ "Honey", "Mead" },
{ "Milk", "Cheese" },
{ "Large Milk", "Cheese" },
{ "Goat Milk", "Goat Cheese" },
{ "Large Goat Milk", "Goat Cheese" },
{ "Coffee Bean", "Coffee" },
{ "Tea Leaves", "Green Tea" },
{ "Wool", "Cloth" },
{ "Egg", "Mayonnaise" },
{ "Large Egg", "Mayonnaise" },
{ "Void Egg", "Void Mayonnaise" },
{ "Dinosaur Egg", "Dinosaur Mayonnaise" },
{ "Truffle", "Truffle Oil" },
{ "Corn", "Oil" },
{ "Sunflower", "Oil" },
{ "Sunflower Seeds", "Oil" },
{ "Sturgeon Roe", "Caviar" },
{ "Roe", "Aged Roe" },
};
/// <summary>
/// List of flowers for honey
/// </summary>
public static List<string> Flowers = new List<string>
{
{ "Blue Jazz" },
{ "Fairy Rose" },
{ "Poppy" },
{ "Summer Spangle" },
{ "Sunflower" },
{ "Tulip" }
};
}