// implementing the "aggressive" rules is more likely to produce false positives private static readonly bool AGGRESSIVE = true; private static readonly Regex ListingTemplateNamesRegex = Tools.NestedTemplateRegex(new List<string>("listing,see,do,buy,eat,drink,sleep".Split(','))); private static readonly string[] ListingTemplateParamNames = {"type","name","alt","url","email","address","lat","long","directions","phone","tollfree","fax","image","hours","checkin","checkout","price","lastedit","content"}; private static readonly string[] ListingPhoneParamNames = {"phone","fax","tollfree"}; private static readonly Regex TextToListingSectionNames = new Regex(@"(get in|get around|see|do|buy|eat|drink|sleep)", RegexOptions.IgnoreCase); private static readonly Regex PhoneNumberRegex = new Regex(@"'*[\d\+\(][\s\d\(\)\-\.'\+]+[\d\)]'*"); private static readonly Regex MailtoRegex = new Regex(@"mailto:[/]*", RegexOptions.IgnoreCase); private static readonly Regex ExcessWhitespaceRegex = new Regex(@"\s\s+", RegexOptions.Singleline); // "http://www.example.com" private static readonly string ValidUrlPattern = @"((http(s)?:)//)?(([a-z]+\.)+)([a-z]+)"; private static readonly Regex ValidUrlRegex = new Regex(ValidUrlPattern, RegexOptions.Singleline | RegexOptions.IgnoreCase); // "[http://www.example.com]" private static readonly Regex FootnoteUrlRegex = new Regex(@"\[(" + ValidUrlPattern + @"([^\]\s]+))\]", RegexOptions.Singleline); // "Foo (disambiguation)" private static readonly Regex CityNameWithDisambiguationRegex = new Regex(@"([^\(]+) \([^\)]+\)", RegexOptions.Singleline); // "blah, CA 99999", "blah, CA 99999-1234", "blah CA", etc private static readonly Regex AddressWithStateOrZipRegex = new Regex(@"(.+)[\.,\-]+\s*(AL|alabama|AK|alaska|AZ|arizona|AR|arkansas|CA|california|CO|colorado|CT|connecticut|DC|DE|delaware|FL|florida|GA|georgia|HI|hawaii|ID|idaho|IL|illinois|IN|indiana|IA|iowa|KS|kansas|KY|kentucky|LA|louisiana|ME|maine|MD|maryland|MA|massachusetts|MI|michigan|MN|minnesota|MS|mississippi|MO|missouri|MT|montana|NE|nebraska|NV|nevada|NH|new hampshire|NJ|new jersey|NM|new mexico|NY|new york|NC|north carolina|ND|north dakota|OH|ohio|OK|oklahoma|OR|oregon|PA|pennsylvania|RI|rhode island|SC|south carolina|SD|south dakota|TN|tennessee|TX|texas|UT|utah|VT|vermont|VA|virginia|WA|washington|WV|west virginia|WI|wisconsin|WY|wyoming)([\s,\-]*[0-9]{5}(\-[0-9]{4})?)?$", RegexOptions.Singleline | RegexOptions.IgnoreCase); // "* ", "** ", etc private static readonly Regex EmptyListItemRegex = new Regex(@"^\*+\s*\n", RegexOptions.Multiline); private static readonly Regex FootnoteToFrontLinkRegex = new Regex(@"('*)((?!The )\p{Lu}[\w\-'/]*[\w]( (and|del|de|of|&|the|la|le|for|\p{Lu}[\w\-'/]*[\w]))*)('*)[, ]*\[(http[^ ]+)( )*\]"); private static readonly Regex TimeValuesShouldUseColonAsSeperator = new Regex(@"\b([1-9]|10|11|12)\.([0-5][0-9])([ ]*)((a|p)\.?\s*m\.|(a|p)\.?\s*m\b)", RegexOptions.IgnoreCase); private static readonly Regex TimeValuesSuffixAM = new Regex(@"\b([1-9]|10|11|12)(:[0-5][0-9])*[ ]*(a\.\s*m\.|a\s*m\b)", RegexOptions.IgnoreCase); private static readonly Regex TimeValuesSuffixPM = new Regex(@"\b([1-9]|10|11|12)(:[0-5][0-9])*[ ]*(p\.\s*m\.|p\s*m\b)", RegexOptions.IgnoreCase); private static readonly Regex TimeValuesRoundToHour = new Regex(@"\b([1-9]|10|11|12):00(AM|PM)\b", RegexOptions.IgnoreCase); private static readonly Regex TimeValuesHourToNoon = new Regex(@"\b(12PM)\b", RegexOptions.IgnoreCase); private static readonly Regex TimeValuesHourToMidnight = new Regex(@"\b(12AM)\b", RegexOptions.IgnoreCase); private static readonly Regex TimeValuesTrimWhitespace = new Regex(@"\b((([1-9]|10|11|12)(:[0-5][0-9])*(AM|PM))|noon|midnight)\s*(\-)\s*((([1-9]|10|11|12)(:[0-5][0-9])*(AM|PM))|noon|midnight)\b", RegexOptions.IgnoreCase); private static readonly Regex ExternalToInternalLinkWikipedia = new Regex(@"\[http[s]?://en.wikipedia.org/wiki/([^\] ]+)[ ]+([^\]]+)]", RegexOptions.IgnoreCase); private static readonly Regex ExternalToInternalLinkWikipediaNoText = new Regex(@"\[http[s]?://en.wikipedia.org/wiki/([^\] ]+)[ ]*]", RegexOptions.IgnoreCase); private static readonly Regex ExternalToInternalLinkWikivoyage = new Regex(@"\[http[s]?://en.wikivoyage.org/wiki/([^\] ]+)[ ]+([^\]]+)]", RegexOptions.IgnoreCase); private static readonly Regex ExternalToInternalLinkWikivoyageNoText = new Regex(@"\[http[s]?://en.wikivoyage.org/wiki/([^\] ]+)[ ]*]", RegexOptions.IgnoreCase); // "* blah blah blah", "** blah blah blah", etc private static readonly Regex CandidateListingItemRegex = new Regex(@"^\*+\s*([^\{]{2}.+)$", RegexOptions.Multiline); private static readonly Regex ListingNameRegex = new Regex(@"(the\s+)*'''(.{3,}?)'''", RegexOptions.IgnoreCase); // "[http://www.example.com/ Example Text]" private static readonly Regex ExternalLinkWithTextRegex = new Regex(@"\[(http[^\]\s]+)\s+([^\]]+)\]", RegexOptions.IgnoreCase); // "123-456-7890" private static readonly string ListingPhoneNumber = @"((''|\+|\()*\d+(''|\)*)[\s\-]+)+(''|\+|\()*\d+(''|\)*)( ext\.? \d+)?"; private static readonly Regex ListingPhoneNumberRegex = new Regex(ListingPhoneNumber, RegexOptions.IgnoreCase); // "fax: 123-456-7890" private static readonly string ListingFaxNumber = @"(\(''|''\(|\()?fax(:)?(\s)*(" + ListingPhoneNumber + @")(''\)|\)''|\))?"; private static readonly Regex ListingFaxNumberRegex = new Regex(ListingFaxNumber, RegexOptions.IgnoreCase); // "telephone: 123-456-7890" and similar private static readonly string ListingPhoneNumberWithLabel = @"(\(''|''\(|\()?(\u260e|call|call:telephone:|telephone|tel:|tel\.:|tel\.|tel|phone:|phone|ph:|ph\.:|ph\.|ph|\u260E)(\s)*(" + ListingPhoneNumber + @")(''\)|\)''|\))?"; private static readonly Regex ListingPhoneNumberWithLabelRegex = new Regex(ListingPhoneNumberWithLabel, RegexOptions.IgnoreCase); // "toll-free: 123-456-7890" and similar private static readonly string ListingTollfreeNumber = @"(\(''|''\(|\()?(toll[ \-]?free)(:)?(\s)*(" + ListingPhoneNumber + @")(''\)|\)''|\))?"; private static readonly Regex ListingTollfreeNumberRegex = new Regex(ListingTollfreeNumber, RegexOptions.IgnoreCase); private static readonly string InvalidLeadingOrTrailingPunctuation = @",|\-|\*|\:|\–|;"; private static readonly string InvalidLeadingPunctuation = @"\.|!|\?|\)|\]|\}|—|;"; private static readonly string InvalidTrailingPunctuation = @"\(|\[|\{|;"; private static readonly string InvalidDuplicatePunctuation = @"\.|!|\?|" + InvalidLeadingOrTrailingPunctuation; private static readonly Regex InvalidLeadingPunctuationRegex = new Regex(@"^(\s|" + InvalidLeadingPunctuation + "|" + InvalidLeadingOrTrailingPunctuation + @")+", RegexOptions.Multiline); private static readonly Regex InvalidTrailingPunctuationRegex = new Regex(@"(\s|" + InvalidTrailingPunctuation + "|" + InvalidLeadingOrTrailingPunctuation + @")+$", RegexOptions.Multiline); private static readonly Regex InvalidDuplicatePunctuationRegex = new Regex(@"(" + InvalidDuplicatePunctuation + @"|\s)+(" + InvalidDuplicatePunctuation + @")"); // "email: [email protected]" OR "mailto:[email protected]" OR "[email protected]" private static readonly string ListingEmail = @"(mailto:|e-mail:|email:)?\s*(\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,4}\b)"; private static readonly Regex ListingEmailRegex = new Regex(ListingEmail, RegexOptions.IgnoreCase); // "1234 First St" private static readonly string ListingAddress = @"([\d]+([/\-][\d]+)? ([nesw]\.? )?([\p{L}\d]+[ \.\-]*){1,3} (avenue|ave|av|boulevard|blvd|court|ct|drive|dr|expressway|expwy|freeway|fwy|highway( \d{1,3})?|hwy( \d{1,3})?|lane|ln|loop|parkway|pkwy|place|pl|road|rd|row|street|st|way)\b(\.? (#(\s)*[\d]+|north|ne|nw|n|east|e|south|se|sw|s|west|w)\b)?)[\. , ]*"; private static readonly Regex ListingAddressRegex = new Regex(ListingAddress, RegexOptions.IgnoreCase); // the above pattern will match things like "25 km by road", so add a pattern to catch those private static readonly string ListingAddressFalsePositives = @"\b(km|kilometer|kilometers|mi|mile|miles)\b"; private static readonly Regex ListingAddressFalsePositivesRegex = new Regex(ListingAddressFalsePositives, RegexOptions.IgnoreCase); // "Calle Ricardo Montalban, 452" private static readonly string ListingAddressInternational = @"(avenida|ave|av|calle|estrada|est|rua)(\.)? ([\p{L}\d]+[ \.\-]*){1,3}, [\d]+([/\-][\d]+)?"; private static readonly Regex ListingAddressInternationalRegex = new Regex(ListingAddressInternational, RegexOptions.IgnoreCase); private static readonly Regex ListingAddressIsDirectionsRegex = new Regex(@"^(between|corner|end|next|on|)\s", RegexOptions.Multiline); private static readonly Regex CityStatusTemplateNamesRegex = Tools.NestedTemplateRegex(new List<string>("outlinecity,usablecity,guidecity,starcity".Split(','))); private static readonly Regex CountryStatusTemplateNamesRegex = Tools.NestedTemplateRegex(new List<string>("outlinecountry,usablecountry,guidecountry,starcountry".Split(','))); private static readonly Regex DiveguideStatusTemplateNamesRegex = Tools.NestedTemplateRegex(new List<string>("outlinediveguide,usablediveguide,guidediveguide,stardiveguide".Split(','))); private static readonly Regex DistrictStatusTemplateNamesRegex = Tools.NestedTemplateRegex(new List<string>("outlinedistrict,usabledistrict,guidedistrict,stardistrict".Split(','))); private static readonly Regex ItineraryStatusTemplateNamesRegex = Tools.NestedTemplateRegex(new List<string>("outlineitinerary,usableitinerary,guideitinerary,staritinerary".Split(','))); private static readonly Regex ParkStatusTemplateNamesRegex = Tools.NestedTemplateRegex(new List<string>("outlinepark,usablepark,guidepark,starpark".Split(','))); private static readonly Regex PhrasebookStatusTemplateNamesRegex = Tools.NestedTemplateRegex(new List<string>("outlinephrasebook,usablephrasebook,guidephrasebook,starphrasebook".Split(','))); private static readonly Regex RegionStatusTemplateNamesRegex = Tools.NestedTemplateRegex(new List<string>("outlineregion,usableregion,guideregion,starregion".Split(','))); private static readonly Regex TopicStatusTemplateNamesRegex = Tools.NestedTemplateRegex(new List<string>("outlinetopic,usabletopic,guidetopic,startopic".Split(','))); private static readonly Regex ListingAltParamRegex = new Regex(@"\|\s*alt\s*=", RegexOptions.Singleline); // "()", "[]", "{}" private static readonly Regex EmptyPunctuationRegex = new Regex(@"(\(\s*\)|\[\s*\]|\{\s*\})"); private static readonly Regex OnlyPunctuationRegex = new Regex(@"^['\-\(\)\[\]\{\}\*\.\?!\s]+$", RegexOptions.Singleline); // match a single digit private static readonly Regex DigitRegex = new Regex(@"\d"); private static readonly Regex DoubleSpaceRegex = new Regex(@"\s{2,}"); private static readonly Regex InvalidEmptySecondLevelCityHeading = new Regex(@"(cope|learn|respect|stay safe|stay healthy|talk|work)", RegexOptions.IgnoreCase); private static readonly Regex InvalidEmptySecondLevelRegionHeading = new Regex(@"(talk|regions)", RegexOptions.IgnoreCase); private static readonly Regex InvalidEmptyThirdLevelCityHeading = new Regex(@"(by(\s+\w)+)", RegexOptions.IgnoreCase); private static readonly Regex InvalidEmptyThirdLevelRegionHeading = new Regex(@"(itineraries)", RegexOptions.IgnoreCase); private static readonly Dictionary<Regex, string> InvalidSecondLevelHeadings = new Dictionary<Regex, string> { {new Regex(@"^(know|information)$", RegexOptions.IgnoreCase), "Understand"}, {new Regex(@"^(get in|getting in|getting there)$", RegexOptions.IgnoreCase), "Get in"}, {new Regex(@"^(get around|getting around)$", RegexOptions.IgnoreCase), "Get around"}, {new Regex(@"^(sights)$", RegexOptions.IgnoreCase), "See"}, {new Regex(@"^(activities)$", RegexOptions.IgnoreCase), "Do"}, {new Regex(@"^(shopping|shops|shop)$", RegexOptions.IgnoreCase), "Buy"}, {new Regex(@"^(restaurants|dining)$", RegexOptions.IgnoreCase), "Eat"}, {new Regex(@"^(bars|nightlife)$", RegexOptions.IgnoreCase), "Drink"}, {new Regex(@"^(accommodation|hotels|stay)$", RegexOptions.IgnoreCase), "Sleep"}, {new Regex(@"^(stay healthy)$", RegexOptions.IgnoreCase), "Stay healthy"}, {new Regex(@"^(stay safe|safety)$", RegexOptions.IgnoreCase), "Stay safe"}, {new Regex(@"^(go next|get out|nearby)$", RegexOptions.IgnoreCase), "Go next"} }; private static readonly Regex MidrangeHeadingRegex = new Regex(@"^(mid[ \-]*range|moderate)", RegexOptions.IgnoreCase); private static readonly Regex ByOnHeadingRegex = new Regex(@"^(by|on) (.+)", RegexOptions.IgnoreCase); private static readonly Dictionary<Regex, string> InvalidSymbols = new Dictionary<Regex, string> { {new Regex(@"^(•)", RegexOptions.Multiline), "*"}, {new Regex(@"(“|”)"), "\""}, {new Regex(@"(’|‘)"), "'"}, {new Regex(@"…"), "..."}, {new Regex(@"(®|©|™)"), ""} }; private static readonly Regex NoHttpUrlRegex = new Regex(@"([^/])(www\.[a-z0-9\-]+\.[a-z0-9\-]+)", RegexOptions.IgnoreCase); // "-123.1234567", "-123.1234567890" private static readonly Regex LatLongTrimRegex = new Regex(@"((\-)?[0-9]{1,3}\.[0-9]{8})([0-9]+)"); public string ProcessArticle(string articleText, string articleTitle, int wikiNamespace, out string summary, out bool skip) { string originalText = articleText; summary = ""; skip = false; articleText = ReplaceInvalidSymbols(articleText, articleTitle, wikiNamespace, ref summary, ref skip); articleText = FixInvalidUrls(articleText, articleTitle, wikiNamespace, ref summary, ref skip); articleText = UpdateHeadings(articleText, articleTitle, wikiNamespace, ref summary, ref skip); articleText = RemoveEmptyListings(articleText, articleTitle, wikiNamespace, ref summary, ref skip); articleText = RemoveEmptyObsoleteHeadings(articleText, articleTitle, wikiNamespace, ref summary, ref skip); articleText = ExternalToInternalLink(articleText, articleTitle, wikiNamespace, ref summary, ref skip); articleText = FootnoteToFrontlink(articleText, articleTitle, wikiNamespace, ref summary, ref skip); articleText = FormatTimeValues(articleText, articleTitle, wikiNamespace, ref summary, ref skip); if (AGGRESSIVE) { articleText = ConvertTextToListings(articleText, articleTitle, wikiNamespace, ref summary, ref skip); } articleText = FormatListings(articleText, articleTitle, wikiNamespace, ref summary, ref skip); if (articleText.Equals(originalText)) { skip = true; } return articleText; } // replace symbols with the appropriate wiki text equivalent private string ReplaceInvalidSymbols(string articleText, string articleTitle, int wikiNamespace, ref string summary, ref bool skip) { foreach(KeyValuePair<Regex, string> invalidSymbolEntry in InvalidSymbols) { articleText = invalidSymbolEntry.Key.Replace(articleText, invalidSymbolEntry.Value); } return articleText; } // replace URLs of the form "www.exmaple.com" with "http://www.example.com" private string FixInvalidUrls(string articleText, string articleTitle, int wikiNamespace, ref string summary, ref bool skip) { string originalText = articleText; articleText = NoHttpUrlRegex.Replace(articleText, "$1http://$2"); if (!articleText.Equals(originalText)) { summary = UpdateEditSummary(summary, "fix URL(s) missing 'http'"); } return articleText; } // ensure that headings match the article templates private string UpdateHeadings(string articleText, string articleTitle, int wikiNamespace, ref string summary, ref bool skip) { foreach(Match m in WikiRegexes.Headings.Matches(articleText)) { string originalHeading = m.Value; string originalTitle = m.Groups[1].Value; string newTitle = originalTitle; string lowerTitle = originalTitle.ToLower().Trim(); int headingLevel = 1; while (originalHeading[headingLevel] == '=') { headingLevel++; } Match match = null; if (headingLevel == 2) { foreach(KeyValuePair<Regex, string> invalidHeadingEntry in InvalidSecondLevelHeadings) { if (MatchText(lowerTitle, invalidHeadingEntry.Key, ref match)) { newTitle = invalidHeadingEntry.Value; } } } else { if (MatchText(lowerTitle, MidrangeHeadingRegex, ref match)) { newTitle = "Mid-range"; } else if (MatchText(lowerTitle, ByOnHeadingRegex, ref match)) { newTitle = Capitalize(match.Groups[1].Value) + " " + match.Groups[2].Value.ToLower(); } } if (!newTitle.Equals(originalTitle)) { string headingBars = originalHeading.Substring(0, headingLevel); articleText = articleText.Replace(originalHeading, headingBars + newTitle + headingBars); summary = UpdateEditSummary(summary, "'" + originalTitle.Trim() + "' → '" + newTitle + "' per [[WV:AT]]"); } } return articleText; } // Remove any listing templates in which all fields are empty private string RemoveEmptyListings(string articleText, string articleTitle, int wikiNamespace, ref string summary, ref bool skip) { foreach(Match m in ListingTemplateNamesRegex.Matches(articleText)) { string templateCall = m.Value; bool listingIsEmpty = true; foreach(KeyValuePair<string, string> templateParameter in Tools.GetTemplateParameterValues(templateCall)) { if (templateParameter.Value != "") { listingIsEmpty = false; break; } } if (listingIsEmpty) { articleText = articleText.Replace(templateCall, ""); summary = UpdateEditSummary(summary, "empty listing(s) removed"); } } // strip off any list items that are now empty as a result articleText = EmptyListItemRegex.Replace(articleText, ""); return articleText; } // remove obsolete headings if they have no content private string RemoveEmptyObsoleteHeadings(string articleText, string articleTitle, int wikiNamespace, ref string summary, ref bool skip) { bool regionArticle = IsRegionArticle(articleText); bool cityArticle = IsCityArticle(articleText) || IsDistrictArticle(articleText); if (!regionArticle && !cityArticle) { return articleText; } string emptyRegionHeadings = ""; string emptyCityHeadings = ""; foreach(KeyValuePair<string, string> levelTwoSectionData in SplitToSecondLevelSections(articleText)) { string levelTwoSectionName = levelTwoSectionData.Key; string levelTwoSectionText = levelTwoSectionData.Value; if (regionArticle) { Match m = InvalidEmptySecondLevelRegionHeading.Match(levelTwoSectionName); if (m.Success && IsEmptySecondLevelSection(levelTwoSectionText)) { articleText = articleText.Replace(levelTwoSectionText, ""); emptyRegionHeadings = AppendCSV(emptyRegionHeadings, "'" + levelTwoSectionName + "'"); continue; } } if (cityArticle) { Match m = InvalidEmptySecondLevelCityHeading.Match(levelTwoSectionName); if (m.Success && IsEmptySecondLevelSection(levelTwoSectionText)) { articleText = articleText.Replace(levelTwoSectionText, ""); emptyCityHeadings = AppendCSV(emptyCityHeadings, "'" + levelTwoSectionName + "'"); continue; } } foreach(KeyValuePair<string, string> levelThreeSectionData in SplitToThirdLevelSections(levelTwoSectionText)) { string levelThreeSectionName = levelThreeSectionData.Key.Trim(); string levelThreeSectionText = levelThreeSectionData.Value; Match m = InvalidEmptyThirdLevelRegionHeading.Match(levelThreeSectionName); if (m.Success && IsEmptyThirdLevelSection(levelThreeSectionText)) { articleText = articleText.Replace(levelThreeSectionText, ""); emptyRegionHeadings = AppendCSV(emptyRegionHeadings, "'" + levelThreeSectionName + "'"); continue; } if (cityArticle) { m = InvalidEmptyThirdLevelCityHeading.Match(levelThreeSectionName); if (m.Success && IsEmptyThirdLevelSection(levelThreeSectionText)) { articleText = articleText.Replace(levelThreeSectionText, ""); emptyCityHeadings = AppendCSV(emptyCityHeadings, "'" + levelThreeSectionName + "'"); continue; } } } } if (!String.IsNullOrEmpty(emptyRegionHeadings)) { summary = UpdateEditSummary(summary, "remove empty " + emptyRegionHeadings + " heading(s) per [[WV:Region article template]]"); } if (!String.IsNullOrEmpty(emptyCityHeadings)) { summary = UpdateEditSummary(summary, "remove empty " + emptyCityHeadings + " heading(s) per [[WV:Huge city article template]]"); } return articleText; } // convert footnote links to frontlinks private string FootnoteToFrontlink(string articleText, string articleTitle, int wikiNamespace, ref string summary, ref bool skip) { string originalText = articleText; articleText = FootnoteToFrontLinkRegex.Replace(articleText, "$1[$6 $2]$5"); if (!articleText.Equals(originalText)) { summary = UpdateEditSummary(summary, "footnote → frontlink per [[WV:XL]]"); } return articleText; } // convert external links to interwiki/internal links private string ExternalToInternalLink(string articleText, string articleTitle, int wikiNamespace, ref string summary, ref bool skip) { string originalText = articleText; articleText = ExternalToInternalLinkWikipedia.Replace(articleText, "[[w:$1|$2]]"); articleText = ExternalToInternalLinkWikipediaNoText.Replace(articleText, "[[w:$1]]"); articleText = ExternalToInternalLinkWikivoyage.Replace(articleText, "[[$1|$2]]"); articleText = ExternalToInternalLinkWikivoyageNoText.Replace(articleText, "[[$1]]"); if (!articleText.Equals(originalText)) { summary = UpdateEditSummary(summary, "external → internal link(s)"); } return articleText; } private string FormatTimeValues(string articleText, string articleTitle, int wikiNamespace, ref string summary, ref bool skip) { string originalText = articleText; articleText = TimeValuesShouldUseColonAsSeperator.Replace(articleText, "$1:$2$3$4"); articleText = TimeValuesSuffixAM.Replace(articleText, "$1$2AM"); articleText = TimeValuesSuffixPM.Replace(articleText, "$1$2PM"); articleText = TimeValuesRoundToHour.Replace(articleText, "$1$2"); articleText = TimeValuesHourToNoon.Replace(articleText, "noon"); articleText = TimeValuesHourToMidnight.Replace(articleText, "midnight"); articleText = TimeValuesTrimWhitespace.Replace(articleText, "$1$6$7"); if (!articleText.Equals(originalText)) { summary = UpdateEditSummary(summary, "update time(s) per [[WV:TDF]]"); } return articleText; } private string ConvertTextToListings(string articleText, string articleTitle, int wikiNamespace, ref string summary, ref bool skip) { if (!IsCityArticle(articleText) && !IsDistrictArticle(articleText) && !IsParkArticle(articleText)) { // do not try to convert text to listings for non-city or park articles return articleText; } string originalText = articleText; // loop through all sections int count = 0; foreach(KeyValuePair<string, string> sectionData in SplitToSecondLevelSections(articleText)) { string sectionName = sectionData.Key; Match sectionNameMatch = TextToListingSectionNames.Match(sectionName); if (!sectionNameMatch.Success) { // only convert text that's in a section that supports non-generic listings continue; } string listingType = GetValidListingTypeForSection(sectionName); string sectionText = sectionData.Value; // get all list items within the section foreach(Match m in CandidateListingItemRegex.Matches(sectionText)) { string listItemText = m.Groups[1].Value; string templateCall = ConvertListingItemtoTemplatedListing(listItemText, listingType, ref summary); if (templateCall != "") { articleText = articleText.Replace(listItemText, templateCall); count++; } } } if (count > 0) { string plural = ""; if (count > 1) { plural = "s"; } summary = UpdateEditSummary(summary, "convert " + count + " plain text listing" + plural + " to [[WV:Listings|templated listing" + plural + "]]"); } return articleText; } private string ConvertListingItemtoTemplatedListing(string listItemText, string listingType, ref string summary) { string templateCall = "{{" + listingType + "}}"; if (!ProcessListingNameInListItem(ref listItemText, ref templateCall)) { // if we don't have a listing name don't bother trying to convert anything else return ""; } ProcessListingPhoneInListItem(ref listItemText, ref templateCall); ProcessListingEmailInListItem(ref listItemText, ref templateCall); ProcessListingUrlInListItem(ref listItemText, ref templateCall); ProcessListingAddressInListItem(ref listItemText, ref templateCall); ProcessListingContentInListItem(ref listItemText, ref templateCall); return templateCall; } private bool ProcessListingNameInListItem(ref string listItemText, ref string templateCall) { Match m = ListingNameRegex.Match(listItemText); if (!m.Success || m.Index != 0) { // if there isn't a name at the beginning of the listing, don't convert return false; } string name = m.Groups[2].Value.Trim(); templateCall = Tools.SetTemplateParameterValue(templateCall, "name", name); // see if the name is front-linked Match urlMatch = ExternalLinkWithTextRegex.Match(name); if (urlMatch.Success && urlMatch.Index == 0) { // split the name & url fields string url = urlMatch.Groups[1].Value; templateCall = Tools.SetTemplateParameterValue(templateCall, "url", url); name = urlMatch.Groups[2].Value; templateCall = Tools.SetTemplateParameterValue(templateCall, "name", name); } // remove name from list item text listItemText = RemoveValueFromListItemText(listItemText, m.Value); return true; } private void ProcessListingPhoneInListItem(ref string listItemText, ref string templateCall) { Match phoneMatch = ListingPhoneNumberWithLabelRegex.Match(listItemText); if (phoneMatch.Success) { string phone = phoneMatch.Groups[4].Value; templateCall = Tools.SetTemplateParameterValue(templateCall, "phone", phone); listItemText = RemoveValueFromListItemText(listItemText, phoneMatch.Value); } Match faxMatch = ListingFaxNumberRegex.Match(listItemText); if (faxMatch.Success) { string fax = faxMatch.Groups[4].Value; templateCall = Tools.SetTemplateParameterValue(templateCall, "fax", fax); listItemText = RemoveValueFromListItemText(listItemText, faxMatch.Value); } Match tollfreeMatch = ListingTollfreeNumberRegex.Match(listItemText); if (tollfreeMatch.Success) { string tollfree = tollfreeMatch.Groups[5].Value; templateCall = Tools.SetTemplateParameterValue(templateCall, "tollfree", tollfree); listItemText = RemoveValueFromListItemText(listItemText, tollfreeMatch.Value); } if (!phoneMatch.Success) { // try to find a phone number without a label phoneMatch = ListingPhoneNumberRegex.Match(listItemText); if (phoneMatch.Success) { string phone = phoneMatch.Value; if (DigitRegex.Matches(phone).Count > 6) { // only consider a phone number valid if it contains more than six digits templateCall = Tools.SetTemplateParameterValue(templateCall, "phone", phone); listItemText = RemoveValueFromListItemText(listItemText, phoneMatch.Value); } } } } private void ProcessListingEmailInListItem(ref string listItemText, ref string templateCall) { Match emailMatch = ListingEmailRegex.Match(listItemText); if (emailMatch.Success) { string email = emailMatch.Groups[2].Value; templateCall = Tools.SetTemplateParameterValue(templateCall, "email", email); listItemText = RemoveValueFromListItemText(listItemText, emailMatch.Value); } } private void ProcessListingUrlInListItem(ref string listItemText, ref string templateCall) { if (Tools.GetTemplateParameterValue(templateCall, "url") != "") { // url was already set when processing listing name return; } Match urlMatch = FootnoteUrlRegex.Match(listItemText); if (urlMatch.Success) { string url = urlMatch.Groups[1].Value; templateCall = Tools.SetTemplateParameterValue(templateCall, "url", url); listItemText = RemoveValueFromListItemText(listItemText, urlMatch.Value); } } private void ProcessListingAddressInListItem(ref string listItemText, ref string templateCall) { Match addressMatch = ListingAddressRegex.Match(listItemText); if (addressMatch.Success) { string address = addressMatch.Groups[1].Value; Match falsePositiveMatch = ListingAddressFalsePositivesRegex.Match(address); if (!falsePositiveMatch.Success) { templateCall = Tools.SetTemplateParameterValue(templateCall, "address", address); listItemText = RemoveValueFromListItemText(listItemText, addressMatch.Value); } } else { addressMatch = ListingAddressInternationalRegex.Match(listItemText); if (addressMatch.Success) { string address = addressMatch.Groups[1].Value; templateCall = Tools.SetTemplateParameterValue(templateCall, "address", address); listItemText = RemoveValueFromListItemText(listItemText, addressMatch.Value); } } } private void ProcessListingContentInListItem(ref string listItemText, ref string templateCall) { if (String.IsNullOrEmpty(listItemText)) { return; } listItemText = SanitizeListingContent(listItemText); templateCall = Tools.SetTemplateParameterValue(templateCall, "content", listItemText); } private string RemoveValueFromListItemText(string listItemText, string value) { listItemText = listItemText.Replace(value, "").Trim(); listItemText = InvalidDuplicatePunctuationRegex.Replace(listItemText, "$1"); listItemText = StripLeadingPunctuation(listItemText).Trim(); return listItemText; } // Perform various tasks on listings to ensure params and other values // are formatted correctly. private string FormatListings(string articleText, string articleTitle, int wikiNamespace, ref string summary, ref bool skip) { string originalText = articleText; // loop through all sections foreach(KeyValuePair<string, string> sectionData in SplitToSecondLevelSections(articleText)) { string sectionName = sectionData.Key; string sectionText = sectionData.Value; // get all listing tags within the section foreach(Match m in ListingTemplateNamesRegex.Matches(sectionText)) { string templateCall = m.Value; string originalTemplateCall = m.Value; templateCall = ConvertGenericListingToSpecificType(templateCall, ref summary); templateCall = MatchListingTypeToSection(templateCall, sectionName, ref summary); templateCall = FormatListingParams(templateCall, ref summary); templateCall = SanitizeListingPhoneNumbers(templateCall, ref summary); templateCall = SanitizeListingEmail(templateCall, ref summary); templateCall = SanitizeListingLatLong(templateCall, ref summary); templateCall = SanitizeListingUrl(templateCall, ref summary); templateCall = SanitizeListingContent(templateCall, ref summary); if (AGGRESSIVE) { templateCall = SanitizeListingAddress(templateCall, articleTitle, ref summary); } if (!templateCall.Equals(originalTemplateCall)) { articleText = articleText.Replace(originalTemplateCall, templateCall); } } } return articleText; } // change "{{listing|type=xyz|...}}" to "{{xyz|...}}" private string ConvertGenericListingToSpecificType(string templateCall, ref string summary) { string originalTemplateCall = templateCall; string listingType = Tools.GetTemplateName(templateCall); if (listingType.Equals("listing")) { string templateType = Tools.GetTemplateParameterValue(templateCall, "type").ToLower(); if (templateType.Equals("see") || templateType.Equals("do") || templateType.Equals("buy") || templateType.Equals("eat") || templateType.Equals("drink") || templateType.Equals("sleep")) { templateCall = Tools.RenameTemplate(templateCall, templateType); templateCall = Tools.RemoveTemplateParameter(templateCall, "type"); } } if (!templateCall.Equals(originalTemplateCall)) { summary = UpdateEditSummary(summary, "update listing type to match expected section type"); } return templateCall; } // make sure listings are formatted according to the style guidelines in // Wikivoyage:Listings private string FormatListingParams(string templateCall, ref string summary) { string originalTemplateCall = templateCall; string listingType = Tools.GetTemplateName(templateCall); string formattedValue = "{{" + listingType + "\n"; // loop through expected template arguments and format appropriately foreach(string param in ListingTemplateParamNames) { if (param.Equals("content")) { continue; } string paramValue = Tools.GetTemplateParameterValue(templateCall, param); if (param.Equals("type") && (paramValue == "" || !listingType.Equals("listing"))) { // only listing uses the "type" attribute if (paramValue == "") { templateCall = Tools.RemoveTemplateParameter(templateCall, param); } continue; } if ((param.Equals("image") || param.Equals("lastedit")) && paramValue == "") { // empty image & lastedit attributes are unnecessary continue; } if (listingType.Equals("sleep") && param.Equals("hours") && paramValue == "") { // sleep listings don't use the "hours" attribute if (paramValue == "") { templateCall = Tools.RemoveTemplateParameter(templateCall, param); } continue; } if (!listingType.Equals("sleep") && (param.Equals("checkin") || param.Equals("checkout"))) { // only sleep listings use the "checkin" and "checkout" attributes if (paramValue == "") { templateCall = Tools.RemoveTemplateParameter(templateCall, param); } continue; } if (param.Equals("alt") && paramValue == "" && !ListingAltParamRegex.IsMatch(templateCall)) { // do not add an alt tag if it isn't already present continue; } // replace excess whitespace with single spaces paramValue = DoubleSpaceRegex.Replace(paramValue, " "); formattedValue += "| " + param + "=" + paramValue; // add either a newline or a space after the param value, depending on param and template type if (param.Equals("email") || param.Equals("directions") || param.Equals("fax") || param.Equals("price") || param.Equals("image") || param.Equals("lastedit")) { formattedValue += "\n"; } else { formattedValue += " "; } templateCall = Tools.RemoveTemplateParameter(templateCall, param); } // loop through any unexpected template args and format on their own lines int remainingTemplateArgs = Tools.GetTemplateArgumentCount(templateCall); foreach(KeyValuePair<string, string> templateParameter in Tools.GetTemplateParameterValues(templateCall)) { string param = templateParameter.Key; if (param.Equals("content")) { continue; } string paramValue = templateParameter.Value; if (paramValue == "") { // any empty, unrecognized listing params can be removed continue; } formattedValue += "| " + param + "=" + paramValue + "\n"; } // now add template content on its own line formattedValue += "| content=" + Tools.GetTemplateParameterValue(templateCall, "content") + "\n"; formattedValue += "}}"; if (!originalTemplateCall.Equals(formattedValue)) { // suppress edit summary for now - it is being applied to any article with listings // whether they are updated or not // summary = UpdateEditSummary(summary, "format listing per [[WV:Listings]]"); } return formattedValue; } // update the listing phone number fields as much as possible to match // Wikivoyage:Phone numbers private string SanitizeListingPhoneNumbers(string templateCall, ref string summary) { string originalTemplateCall = templateCall; // loop through expected template arguments and format appropriately foreach(string param in ListingPhoneParamNames) { string paramValue = Tools.GetTemplateParameterValue(templateCall, param); paramValue = SanitizePhoneNumber(paramValue); templateCall = Tools.UpdateTemplateParameterValue(templateCall, param, paramValue); } if (!templateCall.Equals(originalTemplateCall)) { summary = UpdateEditSummary(summary, "format listing phone # per [[WV:Phone numbers]]"); } return templateCall; } // update the listing email field to ensure it is valid private string SanitizeListingEmail(string templateCall, ref string summary) { string originalTemplateCall = templateCall; string email = Tools.GetTemplateParameterValue(templateCall, "email"); email = MailtoRegex.Replace(email, "").Trim(); templateCall = Tools.UpdateTemplateParameterValue(templateCall, "email", email); if (!templateCall.Equals(originalTemplateCall)) { summary = UpdateEditSummary(summary, "fix invalid listing email"); } return templateCall; } // trim lat/long precision to no more than eight digits private string SanitizeListingLatLong(string templateCall, ref string summary) { string originalTemplateCall = templateCall; string latitude = Tools.GetTemplateParameterValue(templateCall, "lat"); latitude = LatLongTrimRegex.Replace(latitude, "$1"); templateCall = Tools.UpdateTemplateParameterValue(templateCall, "lat", latitude); string longitude = Tools.GetTemplateParameterValue(templateCall, "long"); longitude = LatLongTrimRegex.Replace(longitude, "$1"); templateCall = Tools.UpdateTemplateParameterValue(templateCall, "long", longitude); if (!templateCall.Equals(originalTemplateCall)) { summary = UpdateEditSummary(summary, "trim lat/long precision"); } return templateCall; } // update the listing URL field to ensure it is valid private string SanitizeListingUrl(string templateCall, ref string summary) { string originalTemplateCall = templateCall; string url = Tools.GetTemplateParameterValue(templateCall, "url"); url = FootnoteUrlRegex.Replace(url, "$1"); Match match = ValidUrlRegex.Match(url); if (match.Success && !url.ToLower().StartsWith("http://") && !url.ToLower().StartsWith("https://") && !url.ToLower().StartsWith("//")) { url = "http://" + url; } templateCall = Tools.UpdateTemplateParameterValue(templateCall, "url", url); if (!templateCall.Equals(originalTemplateCall)) { summary = UpdateEditSummary(summary, "fix invalid listing URL"); } return templateCall; } // make sure the listing content is capitalized, isn't just punctuation, etc. private string SanitizeListingContent(string templateCall, ref string summary) { string content = Tools.GetTemplateParameterValue(templateCall, "content"); content = SanitizeListingContent(content); return Tools.UpdateTemplateParameterValue(templateCall, "content", content); } private string SanitizeListingContent(string content) { if (String.IsNullOrEmpty(content)) { return content; } // strip empty punctuation content = EmptyPunctuationRegex.Replace(content, ""); // strip sentence fragments left over from text-to-listing conversions if (content.ToLower().StartsWith("is ")) { content = content.Substring("is ".Length); } // make sure first character is capitalized content = Capitalize(content); // if only punctuation is left, remove everything if (OnlyPunctuationRegex.IsMatch(content)) { content = ""; } return content; } // update the listing address field to ensure it is valid private string SanitizeListingAddress(string templateCall, string articleName, ref string summary) { string originalTemplateCall = templateCall; string address = Tools.GetTemplateParameterValue(templateCall, "address"); if (String.IsNullOrEmpty(address)) { return templateCall; } Match m = ListingAddressIsDirectionsRegex.Match(address); if (m.Success) { // the address field belongs in the directions field string directions = Tools.GetTemplateParameterValue(templateCall, "directions"); if (String.IsNullOrEmpty(directions)) { templateCall = Tools.UpdateTemplateParameterValue(templateCall, "directions", address); templateCall = Tools.UpdateTemplateParameterValue(templateCall, "address", ""); summary = UpdateEditSummary(summary, "listing address moved to directions"); return templateCall; } } // strip out state and zip code if present address = AddressWithStateOrZipRegex.Replace(address, "$1"); address = StripStrayPunctuation(address); // strip out the city if it is present string city = GetCityFromArticleName(articleName); if (address.ToLower().EndsWith(city.ToLower())) { int pos = address.ToLower().LastIndexOf(city.ToLower()); address = address.Substring(0, pos).Trim(); } address = StripStrayPunctuation(address); // properly abbreviate street name address = AbbreviateStreeType(address, "Avenue", "Ave"); address = AbbreviateStreeType(address, "Boulevard", "Blvd"); address = AbbreviateStreeType(address, "Court", "Ct"); address = AbbreviateStreeType(address, "Drive", "Dr"); address = AbbreviateStreeType(address, "Lane", "Ln"); address = AbbreviateStreeType(address, "Place", "Pl"); address = AbbreviateStreeType(address, "Road", "Rd"); address = AbbreviateStreeType(address, "Street", "St"); templateCall = Tools.UpdateTemplateParameterValue(templateCall, "address", address); if (!templateCall.Equals(originalTemplateCall)) { summary = UpdateEditSummary(summary, "format listing address per [[WV:Listings]]"); } return templateCall; } // make sure that the listing type matches the section in which the listing // is found (example: "see" listings in the "See" section) private string MatchListingTypeToSection(string templateCall, string sectionName, ref string summary) { string originalTemplateCall = templateCall; if (sectionName.ToLower() == "eat and drink" || sectionName.ToLower() == "see and do") { // skip these "combined" sections return templateCall; } string expectedListingType = GetValidListingTypeForSection(sectionName); string listingType = Tools.GetTemplateName(templateCall); if (!listingType.Equals(expectedListingType)) { templateCall = Tools.RenameTemplate(templateCall, expectedListingType, false); } if (!templateCall.Equals(originalTemplateCall)) { summary = UpdateEditSummary(summary, "update listing type to match expected section type"); } return templateCall; } // return a map of section name-section content for all second level headings. // the opening text of the article is returned without a section name private static List<KeyValuePair<string, string>> SplitToSecondLevelSections(string articleContent) { return SplitToHeadingSections(articleContent, WikiRegexes.HeadingLevelTwo); } private static List<KeyValuePair<string, string>> SplitToThirdLevelSections(string articleContent) { return SplitToHeadingSections(articleContent, WikiRegexes.HeadingLevelThree); } private static List<KeyValuePair<string, string>> SplitToHeadingSections(string articleContent, Regex headingRegex) { List<KeyValuePair<string, string>> sections = new List<KeyValuePair<string, string>>(); int lastmatchpos = 0; Match lastMatch = null; foreach(Match m in headingRegex.Matches(articleContent)) { if (m.Index > 0) { // Don't add empty first section if page starts with heading string sectionContent = articleContent.Substring(lastmatchpos, m.Index-lastmatchpos); string sectionName = (lastMatch != null) ? lastMatch.Groups[1].Value.Trim() : ""; sections.Add(new KeyValuePair<string, string>(sectionName, sectionContent)); } lastmatchpos = m.Index; lastMatch = m; } // Add text of final section string sectionContentLast = articleContent.Substring(lastmatchpos); string sectionNameLast = (lastMatch != null) ? lastMatch.Groups[1].Value : ""; sections.Add(new KeyValuePair<string, string>(sectionNameLast, sectionContentLast)); return sections; } private static bool IsEmptySecondLevelSection(string sectionContent) { return IsEmptySection(sectionContent, WikiRegexes.HeadingLevelTwo); } private static bool IsEmptyThirdLevelSection(string sectionContent) { return IsEmptySection(sectionContent, WikiRegexes.HeadingLevelThree); } private static bool IsEmptySection(string sectionContent, Regex headingRegex) { Match m = headingRegex.Match(sectionContent); if (!m.Success || m.Index != 0) { // this shouldn't happen, but just in case return false; } return (m.Value.Trim().Length == sectionContent.Trim().Length); } // return the expected listing type for the given section name ("See" returns "see"). private static string GetValidListingTypeForSection(string sectionName) { if (sectionName == "See") { return "see"; } if (sectionName == "Do") { return "do"; } if (sectionName == "Buy") { return "buy"; } if (sectionName == "Eat" || sectionName.ToLower() == "eat and drink") { return "eat"; } if (sectionName == "Drink") { return "drink"; } if (sectionName == "Sleep") { return "sleep"; } else { return "listing"; } } private static string SanitizePhoneNumber(string phoneNumberText) { // strip out the phone number, in case it is contained within other text // such as "888-888-8888 (front office)" Match match = PhoneNumberRegex.Match(phoneNumberText); if (match.Success) { string phoneNumber = match.Value; string originalPhoneNumber = match.Value; // remove invalid characters phoneNumber = phoneNumber.Replace("'", ""); phoneNumber = phoneNumber.Replace("(", " ").Trim(); phoneNumber = phoneNumber.Replace(")", " ").Trim(); // convert periods to dashes phoneNumber = phoneNumber.Replace(".", "-"); phoneNumber = ExcessWhitespaceRegex.Replace(phoneNumber, " "); // if there is a pattern like " -", "- ", "+ " left, replace the space phoneNumber = phoneNumber.Replace(" -", "-"); phoneNumber = phoneNumber.Replace("- ", "-"); phoneNumber = phoneNumber.Replace("+ ", "+"); // if the phone number starts with a 1, change it to +1 if (phoneNumber.StartsWith("1 ") || phoneNumber.StartsWith("1-")) { phoneNumber = "+" + phoneNumber; } phoneNumberText = phoneNumberText.Replace(originalPhoneNumber, phoneNumber); } return phoneNumberText; } // return the city from the article name. if the article name is "Foo (Disambiguation)" // then this method returns "Foo". private static string GetCityFromArticleName(string articleName) { string basePageName = Tools.BasePageName(articleName); Match match = CityNameWithDisambiguationRegex.Match(basePageName); return (match.Success) ? match.Groups[1].Value : basePageName; } // if the address ends in a full street type value, convert to the abbreviated value private static string AbbreviateStreeType(string address, string invalidStreetType, string validStreetType) { if (address.ToLower().EndsWith(" " + invalidStreetType.ToLower())) { int pos = address.ToLower().LastIndexOf(invalidStreetType.ToLower()); if (pos > 0) { address = address.Substring(0, pos) + validStreetType; } } return address; } // remove any leading or trailing punctuation private static string StripStrayPunctuation(string text) { return StripPunctuation(text, true, true); } // remove any leading punctuation private static string StripLeadingPunctuation(string text) { return StripPunctuation(text, true, false); } // remove any trailing punctuation private static string StripTrailingPunctuation(string text) { return StripPunctuation(text, false, true); } // remove any leading punctuation private static string StripPunctuation(string text, bool stripLeading, bool stripTrailing) { if (stripTrailing) { text = InvalidTrailingPunctuationRegex.Replace(text, ""); } if (stripLeading) { text = InvalidLeadingPunctuationRegex.Replace(text, ""); } return text; } // return true if the text matches the pattern, otherwise return null, the "match" // param will be populated with the match object private static bool MatchText(string matchText, Regex regex, ref Match match) { match = regex.Match(matchText); return (match.Success); } private static string Capitalize(string text) { if (String.IsNullOrEmpty(text)) { return text; } // make sure first character is capitalized return (text.Length == 1) ? Char.ToUpper(text[0]) + "" : Char.ToUpper(text[0]) + text.Substring(1); } // add the value to the edit summary if it is not already present private static string UpdateEditSummary(string summary, string textToAdd) { if (summary == "") { return textToAdd; } foreach(string summaryField in summary.Split(',')) { if (summaryField.Trim().Equals(textToAdd)) { // text already present in edit summary return summary; } } return summary += ", " + textToAdd; } // append the value to the existing list as a CSV private static string AppendCSV(string currentList, string valueToAdd) { if (String.IsNullOrEmpty(currentList)) { return valueToAdd; } return currentList += ", " + valueToAdd; } // return true if the article contains a city status template private static bool IsCityArticle(string articleText) { return CityStatusTemplateNamesRegex.IsMatch(articleText); } // return true if the article contains a country status template private static bool IsCountryArticle(string articleText) { return CountryStatusTemplateNamesRegex.IsMatch(articleText); } // return true if the article contains a dive guide status template private static bool IsDiveguideArticle(string articleText) { return DiveguideStatusTemplateNamesRegex.IsMatch(articleText); } // return true if the article contains a district status template private static bool IsDistrictArticle(string articleText) { return DistrictStatusTemplateNamesRegex.IsMatch(articleText); } // return true if the article contains an itinerary status template private static bool IsItineraryArticle(string articleText) { return ItineraryStatusTemplateNamesRegex.IsMatch(articleText); } // return true if the article contains a park status template private static bool IsParkArticle(string articleText) { return ParkStatusTemplateNamesRegex.IsMatch(articleText); } // return true if the article contains a phrasebook status template private static bool IsPhrasebookArticle(string articleText) { return PhrasebookStatusTemplateNamesRegex.IsMatch(articleText); } // return true if the article contains a region status template private static bool IsRegionArticle(string articleText) { return RegionStatusTemplateNamesRegex.IsMatch(articleText); } // return true if the article contains a topic status template private static bool IsTopicArticle(string articleText) { return TopicStatusTemplateNamesRegex.IsMatch(articleText); } // TODO: // - move tollfree numbers to tollfree in listings // - don't allow "otheruses" to be moved above the page banner