// implementing the "aggressive" rules is more likely to produce false positives private static readonly bool AGGRESSIVE = true; private static readonly Regex ListingTemplateNamesRegex = Tools.NestedTemplateRegex(new List<string>("listing,see,do,buy,eat,drink,sleep".Split(','))); private static readonly string[] ListingTemplateParamNames = {"type","name","alt","url","email","address","lat","long","directions","phone","tollfree","fax","image","hours","checkin","checkout","price","lastedit","content"}; private static readonly string[] ListingPhoneParamNames = {"phone","fax","tollfree"}; private static readonly Regex TextToListingSectionNames = new Regex(@"(get in|get around|see|do|buy|eat|drink|sleep)", RegexOptions.IgnoreCase); private static readonly Regex PhoneNumberRegex = new Regex(@"'*[\d\+\(][\s\d\(\)\-\.'\+]+[\d\)]'*"); private static readonly Regex MailtoRegex = new Regex(@"mailto:[/]*", RegexOptions.IgnoreCase); private static readonly Regex ExcessWhitespaceRegex = new Regex(@"\s\s+", RegexOptions.Singleline); // "http://www.example.com" private static readonly string ValidUrlPattern = @"((http(s)?:)//)?(([a-z]+\.)+)([a-z]+)"; private static readonly Regex ValidUrlRegex = new Regex(ValidUrlPattern, RegexOptions.Singleline | RegexOptions.IgnoreCase); // "[http://www.example.com]" private static readonly Regex FootnoteUrlRegex = new Regex(@"\[(" + ValidUrlPattern + @"([^\]\s]+))\]", RegexOptions.Singleline); // "Foo (disambiguation)" private static readonly Regex CityNameWithDisambiguationRegex = new Regex(@"([^\(]+) \([^\)]+\)", RegexOptions.Singleline); // "blah, CA 99999", "blah, CA 99999-1234", "blah CA", etc private static readonly Regex AddressWithStateOrZipRegex = new Regex(@"(.+)[\.,\-]+\s*(AL|alabama|AK|alaska|AZ|arizona|AR|arkansas|CA|california|CO|colorado|CT|connecticut|DC|DE|delaware|FL|florida|GA|georgia|HI|hawaii|ID|idaho|IL|illinois|IN|indiana|IA|iowa|KS|kansas|KY|kentucky|LA|louisiana|ME|maine|MD|maryland|MA|massachusetts|MI|michigan|MN|minnesota|MS|mississippi|MO|missouri|MT|montana|NE|nebraska|NV|nevada|NH|new hampshire|NJ|new jersey|NM|new mexico|NY|new york|NC|north carolina|ND|north dakota|OH|ohio|OK|oklahoma|OR|oregon|PA|pennsylvania|RI|rhode island|SC|south carolina|SD|south dakota|TN|tennessee|TX|texas|UT|utah|VT|vermont|VA|virginia|WA|washington|WV|west virginia|WI|wisconsin|WY|wyoming)([\s,\-]*[0-9]{5}(\-[0-9]{4})?)?$", RegexOptions.Singleline | RegexOptions.IgnoreCase); // "* ", "** ", etc private static readonly Regex EmptyListItemRegex = new Regex(@"^\*+\s*\n", RegexOptions.Multiline); private static readonly Regex FootnoteToFrontLinkRegex = new Regex(@"('*)((?!The )\p{Lu}[\w\-'/]*[\w]( (and|del|de|of|&|the|la|le|for|\p{Lu}[\w\-'/]*[\w]))*)('*)[, ]*\[(http[^ ]+)( )*\]"); private static readonly Regex TimeValuesShouldUseColonAsSeperator = new Regex(@"\b([1-9]|10|11|12)\.([0-5][0-9])([ ]*)((a|p)\.?\s*m\.|(a|p)\.?\s*m\b)", RegexOptions.IgnoreCase); private static readonly Regex TimeValuesSuffixAM = new Regex(@"\b([1-9]|10|11|12)(:[0-5][0-9])*[ ]*(a\.\s*m\.|a\s*m\b)", RegexOptions.IgnoreCase); private static readonly Regex TimeValuesSuffixPM = new Regex(@"\b([1-9]|10|11|12)(:[0-5][0-9])*[ ]*(p\.\s*m\.|p\s*m\b)", RegexOptions.IgnoreCase); private static readonly Regex TimeValuesRoundToHour = new Regex(@"\b([1-9]|10|11|12):00(AM|PM)\b", RegexOptions.IgnoreCase); private static readonly Regex TimeValuesHourToNoon = new Regex(@"\b(12PM)\b", RegexOptions.IgnoreCase); private static readonly Regex TimeValuesHourToMidnight = new Regex(@"\b(12AM)\b", RegexOptions.IgnoreCase); private static readonly Regex TimeValuesTrimWhitespace = new Regex(@"\b((([1-9]|10|11|12)(:[0-5][0-9])*(AM|PM))|noon|midnight)\s*(\-)\s*((([1-9]|10|11|12)(:[0-5][0-9])*(AM|PM))|noon|midnight)\b", RegexOptions.IgnoreCase); private static readonly Regex ExternalToInternalLinkWikipedia = new Regex(@"\[http[s]?://en.wikipedia.org/wiki/([^\] ]+)[ ]+([^\]]+)]", RegexOptions.IgnoreCase); private static readonly Regex ExternalToInternalLinkWikipediaNoText = new Regex(@"\[http[s]?://en.wikipedia.org/wiki/([^\] ]+)[ ]*]", RegexOptions.IgnoreCase); private static readonly Regex ExternalToInternalLinkWikivoyage = new Regex(@"\[http[s]?://en.wikivoyage.org/wiki/([^\] ]+)[ ]+([^\]]+)]", RegexOptions.IgnoreCase); private static readonly Regex ExternalToInternalLinkWikivoyageNoText = new Regex(@"\[http[s]?://en.wikivoyage.org/wiki/([^\] ]+)[ ]*]", RegexOptions.IgnoreCase); // "* blah blah blah", "** blah blah blah", etc private static readonly Regex CandidateListingItemRegex = new Regex(@"^\*+\s*([^\{]{2}.+)$", RegexOptions.Multiline); private static readonly Regex ListingNameRegex = new Regex(@"(the\s+)*'''(.{3,}?)'''", RegexOptions.IgnoreCase); // "[http://www.example.com/ Example Text]" private static readonly Regex ExternalLinkWithTextRegex = new Regex(@"\[(http[^\]\s]+)\s+([^\]]+)\]", RegexOptions.IgnoreCase); // "123-456-7890" private static readonly string ListingPhoneNumber = @"((''|\+|\()*\d+(''|\)*)[\s\-]+)+(''|\+|\()*\d+(''|\)*)( ext\.? \d+)?"; private static readonly Regex ListingPhoneNumberRegex = new Regex(ListingPhoneNumber, RegexOptions.IgnoreCase); // "fax: 123-456-7890" private static readonly string ListingFaxNumber = @"(\(''|''\(|\()?fax(:)?(\s)*(" + ListingPhoneNumber + @")(''\)|\)''|\))?"; private static readonly Regex ListingFaxNumberRegex = new Regex(ListingFaxNumber, RegexOptions.IgnoreCase); // "telephone: 123-456-7890" and similar private static readonly string ListingPhoneNumberWithLabel = @"(\(''|''\(|\()?(\u260e|call|call:telephone:|telephone|tel:|tel\.:|tel\.|tel|phone:|phone|ph:|ph\.:|ph\.|ph|\u260E)(\s)*(" + ListingPhoneNumber + @")(''\)|\)''|\))?"; private static readonly Regex ListingPhoneNumberWithLabelRegex = new Regex(ListingPhoneNumberWithLabel, RegexOptions.IgnoreCase); // "toll-free: 123-456-7890" and similar private static readonly string ListingTollfreeNumber = @"(\(''|''\(|\()?(toll[ \-]?free)(:)?(\s)*(" + ListingPhoneNumber + @")(''\)|\)''|\))?"; private static readonly Regex ListingTollfreeNumberRegex = new Regex(ListingTollfreeNumber, RegexOptions.IgnoreCase); private static readonly string InvalidLeadingOrTrailingPunctuation = @",|\-|\*|\:|\–|;"; private static readonly string InvalidLeadingPunctuation = @"\.|!|\?|\)|\]|\}|&mdash;|;"; private static readonly string InvalidTrailingPunctuation = @"\(|\[|\{|;"; private static readonly string InvalidDuplicatePunctuation = @"\.|!|\?|" + InvalidLeadingOrTrailingPunctuation; private static readonly Regex InvalidLeadingPunctuationRegex = new Regex(@"^(\s|" + InvalidLeadingPunctuation + "|" + InvalidLeadingOrTrailingPunctuation + @")+", RegexOptions.Multiline); private static readonly Regex InvalidTrailingPunctuationRegex = new Regex(@"(\s|" + InvalidTrailingPunctuation + "|" + InvalidLeadingOrTrailingPunctuation + @")+$", RegexOptions.Multiline); private static readonly Regex InvalidDuplicatePunctuationRegex = new Regex(@"(" + InvalidDuplicatePunctuation + @"|\s)+(" + InvalidDuplicatePunctuation + @")"); // "email: [email protected]" OR "mailto:[email protected]" OR "[email protected]" private static readonly string ListingEmail = @"(mailto:|e-mail:|email:)?\s*(\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,4}\b)"; private static readonly Regex ListingEmailRegex = new Regex(ListingEmail, RegexOptions.IgnoreCase); // "1234 First St" private static readonly string ListingAddress = @"([\d]+([/\-][\d]+)? ([nesw]\.? )?([\p{L}\d]+[ \.\-]*){1,3} (avenue|ave|av|boulevard|blvd|court|ct|drive|dr|expressway|expwy|freeway|fwy|highway( \d{1,3})?|hwy( \d{1,3})?|lane|ln|loop|parkway|pkwy|place|pl|road|rd|row|street|st|way)\b(\.? (#(\s)*[\d]+|north|ne|nw|n|east|e|south|se|sw|s|west|w)\b)?)[\. , ]*"; private static readonly Regex ListingAddressRegex = new Regex(ListingAddress, RegexOptions.IgnoreCase); // the above pattern will match things like "25 km by road", so add a pattern to catch those private static readonly string ListingAddressFalsePositives = @"\b(km|kilometer|kilometers|mi|mile|miles)\b"; private static readonly Regex ListingAddressFalsePositivesRegex = new Regex(ListingAddressFalsePositives, RegexOptions.IgnoreCase); // "Calle Ricardo Montalban, 452" private static readonly string ListingAddressInternational = @"(avenida|ave|av|calle|estrada|est|rua)(\.)? ([\p{L}\d]+[ \.\-]*){1,3}, [\d]+([/\-][\d]+)?"; private static readonly Regex ListingAddressInternationalRegex = new Regex(ListingAddressInternational, RegexOptions.IgnoreCase); private static readonly Regex ListingAddressIsDirectionsRegex = new Regex(@"^(between|corner|end|next|on|)\s", RegexOptions.Multiline); private static readonly Regex CityStatusTemplateNamesRegex = Tools.NestedTemplateRegex(new List<string>("outlinecity,usablecity,guidecity,starcity".Split(','))); private static readonly Regex CountryStatusTemplateNamesRegex = Tools.NestedTemplateRegex(new List<string>("outlinecountry,usablecountry,guidecountry,starcountry".Split(','))); private static readonly Regex DiveguideStatusTemplateNamesRegex = Tools.NestedTemplateRegex(new List<string>("outlinediveguide,usablediveguide,guidediveguide,stardiveguide".Split(','))); private static readonly Regex DistrictStatusTemplateNamesRegex = Tools.NestedTemplateRegex(new List<string>("outlinedistrict,usabledistrict,guidedistrict,stardistrict".Split(','))); private static readonly Regex ItineraryStatusTemplateNamesRegex = Tools.NestedTemplateRegex(new List<string>("outlineitinerary,usableitinerary,guideitinerary,staritinerary".Split(','))); private static readonly Regex ParkStatusTemplateNamesRegex = Tools.NestedTemplateRegex(new List<string>("outlinepark,usablepark,guidepark,starpark".Split(','))); private static readonly Regex PhrasebookStatusTemplateNamesRegex = Tools.NestedTemplateRegex(new List<string>("outlinephrasebook,usablephrasebook,guidephrasebook,starphrasebook".Split(','))); private static readonly Regex RegionStatusTemplateNamesRegex = Tools.NestedTemplateRegex(new List<string>("outlineregion,usableregion,guideregion,starregion".Split(','))); private static readonly Regex TopicStatusTemplateNamesRegex = Tools.NestedTemplateRegex(new List<string>("outlinetopic,usabletopic,guidetopic,startopic".Split(','))); private static readonly Regex ListingAltParamRegex = new Regex(@"\|\s*alt\s*=", RegexOptions.Singleline); // "()", "[]", "{}" private static readonly Regex EmptyPunctuationRegex = new Regex(@"(\(\s*\)|\[\s*\]|\{\s*\})"); private static readonly Regex OnlyPunctuationRegex = new Regex(@"^['\-\(\)\[\]\{\}\*\.\?!\s]+$", RegexOptions.Singleline); // match a single digit private static readonly Regex DigitRegex = new Regex(@"\d"); private static readonly Regex DoubleSpaceRegex = new Regex(@"\s{2,}"); private static readonly Regex InvalidEmptySecondLevelCityHeading = new Regex(@"(cope|learn|respect|stay safe|stay healthy|talk|work)", RegexOptions.IgnoreCase); private static readonly Regex InvalidEmptySecondLevelRegionHeading = new Regex(@"(talk|regions)", RegexOptions.IgnoreCase); private static readonly Regex InvalidEmptyThirdLevelCityHeading = new Regex(@"(by(\s+\w)+)", RegexOptions.IgnoreCase); private static readonly Regex InvalidEmptyThirdLevelRegionHeading = new Regex(@"(itineraries)", RegexOptions.IgnoreCase); private static readonly Dictionary<Regex, string> InvalidSecondLevelHeadings = new Dictionary<Regex, string> { 	{new Regex(@"^(know|information)$", RegexOptions.IgnoreCase), "Understand"}, 	{new Regex(@"^(get in|getting in|getting there)$", RegexOptions.IgnoreCase), "Get in"}, 	{new Regex(@"^(get around|getting around)$", RegexOptions.IgnoreCase), "Get around"}, 	{new Regex(@"^(sights)$", RegexOptions.IgnoreCase), "See"}, 	{new Regex(@"^(activities)$", RegexOptions.IgnoreCase), "Do"}, 	{new Regex(@"^(shopping|shops|shop)$", RegexOptions.IgnoreCase), "Buy"}, 	{new Regex(@"^(restaurants|dining)$", RegexOptions.IgnoreCase), "Eat"}, 	{new Regex(@"^(bars|nightlife)$", RegexOptions.IgnoreCase), "Drink"}, 	{new Regex(@"^(accommodation|hotels|stay)$", RegexOptions.IgnoreCase), "Sleep"}, 	{new Regex(@"^(stay healthy)$", RegexOptions.IgnoreCase), "Stay healthy"}, 	{new Regex(@"^(stay safe|safety)$", RegexOptions.IgnoreCase), "Stay safe"}, 	{new Regex(@"^(go next|get out|nearby)$", RegexOptions.IgnoreCase), "Go next"} }; private static readonly Regex MidrangeHeadingRegex = new Regex(@"^(mid[ \-]*range|moderate)", RegexOptions.IgnoreCase); private static readonly Regex ByOnHeadingRegex = new Regex(@"^(by|on) (.+)", RegexOptions.IgnoreCase); private static readonly Dictionary<Regex, string> InvalidSymbols = new Dictionary<Regex, string> { 	{new Regex(@"^(•)", RegexOptions.Multiline), "*"}, 	{new Regex(@"(“|”)"), "\""}, 	{new Regex(@"(’|‘)"), "'"}, 	{new Regex(@"…"), "..."}, 	{new Regex(@"(®|©|™)"), ""} }; private static readonly Regex NoHttpUrlRegex = new Regex(@"([^/])(www\.[a-z0-9\-]+\.[a-z0-9\-]+)", RegexOptions.IgnoreCase); // "-123.1234567", "-123.1234567890" private static readonly Regex LatLongTrimRegex = new Regex(@"((\-)?[0-9]{1,3}\.[0-9]{8})([0-9]+)");  public string ProcessArticle(string articleText, string articleTitle, int wikiNamespace, out string summary, out bool skip) { 	string originalText = articleText; 	summary = ""; 	skip = false; 	articleText = ReplaceInvalidSymbols(articleText, articleTitle, wikiNamespace, ref summary, ref skip); 	articleText = FixInvalidUrls(articleText, articleTitle, wikiNamespace, ref summary, ref skip); 	articleText = UpdateHeadings(articleText, articleTitle, wikiNamespace, ref summary, ref skip); 	articleText = RemoveEmptyListings(articleText, articleTitle, wikiNamespace, ref summary, ref skip); 	articleText = RemoveEmptyObsoleteHeadings(articleText, articleTitle, wikiNamespace, ref summary, ref skip); 	articleText = ExternalToInternalLink(articleText, articleTitle, wikiNamespace, ref summary, ref skip); 	articleText = FootnoteToFrontlink(articleText, articleTitle, wikiNamespace, ref summary, ref skip); 	articleText = FormatTimeValues(articleText, articleTitle, wikiNamespace, ref summary, ref skip); 	if (AGGRESSIVE) { 		articleText = ConvertTextToListings(articleText, articleTitle, wikiNamespace, ref summary, ref skip); 	} 	articleText = FormatListings(articleText, articleTitle, wikiNamespace, ref summary, ref skip); 	if (articleText.Equals(originalText)) { 		skip = true; 	} 	return articleText; } // replace symbols with the appropriate wiki text equivalent private string ReplaceInvalidSymbols(string articleText, string articleTitle, int wikiNamespace, ref string summary, ref bool skip) {  	foreach(KeyValuePair<Regex, string> invalidSymbolEntry in InvalidSymbols) { 		articleText = invalidSymbolEntry.Key.Replace(articleText, invalidSymbolEntry.Value); 	} 	return articleText; } // replace URLs of the form "www.exmaple.com" with "http://www.example.com" private string FixInvalidUrls(string articleText, string articleTitle, int wikiNamespace, ref string summary, ref bool skip) {  	string originalText = articleText; 	articleText = NoHttpUrlRegex.Replace(articleText, "$1http://$2"); 	if (!articleText.Equals(originalText)) { 		summary = UpdateEditSummary(summary, "fix URL(s) missing 'http'"); 	} 	return articleText; } // ensure that headings match the article templates private string UpdateHeadings(string articleText, string articleTitle, int wikiNamespace, ref string summary, ref bool skip) {  	foreach(Match m in WikiRegexes.Headings.Matches(articleText)) { 		string originalHeading = m.Value; 		string originalTitle = m.Groups[1].Value; 		string newTitle = originalTitle; 		string lowerTitle = originalTitle.ToLower().Trim(); 		int headingLevel = 1; 		while (originalHeading[headingLevel] == '=') { 			headingLevel++; 		} 		Match match = null; 		if (headingLevel == 2) { 			foreach(KeyValuePair<Regex, string> invalidHeadingEntry in InvalidSecondLevelHeadings) { 				if (MatchText(lowerTitle, invalidHeadingEntry.Key, ref match)) { 					newTitle = invalidHeadingEntry.Value; 				} 			} 		} else { 			if (MatchText(lowerTitle, MidrangeHeadingRegex, ref match)) { 				newTitle = "Mid-range"; 			} else if (MatchText(lowerTitle, ByOnHeadingRegex, ref match)) { 				newTitle = Capitalize(match.Groups[1].Value) + " " + match.Groups[2].Value.ToLower(); 			} 		} 		if (!newTitle.Equals(originalTitle)) { 			string headingBars = originalHeading.Substring(0, headingLevel); 			articleText = articleText.Replace(originalHeading, headingBars + newTitle + headingBars); 			summary = UpdateEditSummary(summary, "'" + originalTitle.Trim() + "' &rarr; '" + newTitle + "' per [[WV:AT]]"); 		} 	} 	return articleText; } // Remove any listing templates in which all fields are empty private string RemoveEmptyListings(string articleText, string articleTitle, int wikiNamespace, ref string summary, ref bool skip) { 	foreach(Match m in ListingTemplateNamesRegex.Matches(articleText)) { 		string templateCall = m.Value; 		bool listingIsEmpty = true; 		foreach(KeyValuePair<string, string> templateParameter in Tools.GetTemplateParameterValues(templateCall)) { 			if (templateParameter.Value != "") { 				listingIsEmpty = false; 				break; 			} 		} 		if (listingIsEmpty) { 			articleText = articleText.Replace(templateCall, ""); 			summary = UpdateEditSummary(summary, "empty listing(s) removed"); 		} 	} 	// strip off any list items that are now empty as a result 	articleText = EmptyListItemRegex.Replace(articleText, ""); 	return articleText; } // remove obsolete headings if they have no content private string RemoveEmptyObsoleteHeadings(string articleText, string articleTitle, int wikiNamespace, ref string summary, ref bool skip) { 	bool regionArticle = IsRegionArticle(articleText); 	bool cityArticle = IsCityArticle(articleText) || IsDistrictArticle(articleText); 	if (!regionArticle && !cityArticle) { 		return articleText; 	} 	string emptyRegionHeadings = ""; 	string emptyCityHeadings = ""; 	foreach(KeyValuePair<string, string> levelTwoSectionData in SplitToSecondLevelSections(articleText)) { 		string levelTwoSectionName = levelTwoSectionData.Key; 		string levelTwoSectionText = levelTwoSectionData.Value; 		if (regionArticle) { 			Match m = InvalidEmptySecondLevelRegionHeading.Match(levelTwoSectionName); 			if (m.Success && IsEmptySecondLevelSection(levelTwoSectionText)) { 				articleText = articleText.Replace(levelTwoSectionText, ""); 				emptyRegionHeadings = AppendCSV(emptyRegionHeadings, "'" + levelTwoSectionName + "'"); 				continue; 			} 		} 		if (cityArticle) { 			Match m = InvalidEmptySecondLevelCityHeading.Match(levelTwoSectionName); 			if (m.Success && IsEmptySecondLevelSection(levelTwoSectionText)) { 				articleText = articleText.Replace(levelTwoSectionText, ""); 				emptyCityHeadings = AppendCSV(emptyCityHeadings, "'" + levelTwoSectionName + "'"); 				continue; 			} 		} 		foreach(KeyValuePair<string, string> levelThreeSectionData in SplitToThirdLevelSections(levelTwoSectionText)) { 			string levelThreeSectionName = levelThreeSectionData.Key.Trim(); 			string levelThreeSectionText = levelThreeSectionData.Value; 			Match m = InvalidEmptyThirdLevelRegionHeading.Match(levelThreeSectionName); 			if (m.Success && IsEmptyThirdLevelSection(levelThreeSectionText)) { 				articleText = articleText.Replace(levelThreeSectionText, ""); 				emptyRegionHeadings = AppendCSV(emptyRegionHeadings, "'" + levelThreeSectionName + "'"); 				continue; 			} 			if (cityArticle) { 				m = InvalidEmptyThirdLevelCityHeading.Match(levelThreeSectionName); 				if (m.Success && IsEmptyThirdLevelSection(levelThreeSectionText)) { 					articleText = articleText.Replace(levelThreeSectionText, ""); 					emptyCityHeadings = AppendCSV(emptyCityHeadings, "'" + levelThreeSectionName + "'"); 					continue; 				} 			} 		} 	} 	if (!String.IsNullOrEmpty(emptyRegionHeadings)) { 		summary = UpdateEditSummary(summary, "remove empty " + emptyRegionHeadings + " heading(s) per [[WV:Region article template]]"); 	} 	if (!String.IsNullOrEmpty(emptyCityHeadings)) { 		summary = UpdateEditSummary(summary, "remove empty " + emptyCityHeadings + " heading(s) per [[WV:Huge city article template]]"); 	} 	return articleText; } // convert footnote links to frontlinks private string FootnoteToFrontlink(string articleText, string articleTitle, int wikiNamespace, ref string summary, ref bool skip) { 	string originalText = articleText; 	articleText = FootnoteToFrontLinkRegex.Replace(articleText, "$1[$6 $2]$5"); 	if (!articleText.Equals(originalText)) { 		summary = UpdateEditSummary(summary, "footnote &rarr; frontlink per [[WV:XL]]"); 	} 	return articleText; } // convert external links to interwiki/internal links private string ExternalToInternalLink(string articleText, string articleTitle, int wikiNamespace, ref string summary, ref bool skip) { 	string originalText = articleText; 	articleText = ExternalToInternalLinkWikipedia.Replace(articleText, "[[w:$1|$2]]"); 	articleText = ExternalToInternalLinkWikipediaNoText.Replace(articleText, "[[w:$1]]"); 	articleText = ExternalToInternalLinkWikivoyage.Replace(articleText, "[[$1|$2]]"); 	articleText = ExternalToInternalLinkWikivoyageNoText.Replace(articleText, "[[$1]]"); 	if (!articleText.Equals(originalText)) { 		summary = UpdateEditSummary(summary, "external &rarr; internal link(s)"); 	} 	return articleText; } private string FormatTimeValues(string articleText, string articleTitle, int wikiNamespace, ref string summary, ref bool skip) { 	string originalText = articleText; 	articleText = TimeValuesShouldUseColonAsSeperator.Replace(articleText, "$1:$2$3$4"); 	articleText = TimeValuesSuffixAM.Replace(articleText, "$1$2AM"); 	articleText = TimeValuesSuffixPM.Replace(articleText, "$1$2PM"); 	articleText = TimeValuesRoundToHour.Replace(articleText, "$1$2"); 	articleText = TimeValuesHourToNoon.Replace(articleText, "noon"); 	articleText = TimeValuesHourToMidnight.Replace(articleText, "midnight"); 	articleText = TimeValuesTrimWhitespace.Replace(articleText, "$1$6$7"); 	if (!articleText.Equals(originalText)) { 		summary = UpdateEditSummary(summary, "update time(s) per [[WV:TDF]]"); 	} 	return articleText; } private string ConvertTextToListings(string articleText, string articleTitle, int wikiNamespace, ref string summary, ref bool skip) { 	if (!IsCityArticle(articleText) && !IsDistrictArticle(articleText) && !IsParkArticle(articleText)) { 		// do not try to convert text to listings for non-city or park articles 		return articleText; 	} 	string originalText = articleText; 	// loop through all sections 	int count = 0; 	foreach(KeyValuePair<string, string> sectionData in SplitToSecondLevelSections(articleText)) { 		string sectionName = sectionData.Key; 		Match sectionNameMatch = TextToListingSectionNames.Match(sectionName); 		if (!sectionNameMatch.Success) { 			// only convert text that's in a section that supports non-generic listings 			continue; 		} 		string listingType = GetValidListingTypeForSection(sectionName); 		string sectionText = sectionData.Value; 		// get all list items within the section 		foreach(Match m in CandidateListingItemRegex.Matches(sectionText)) { 			string listItemText = m.Groups[1].Value; 			string templateCall = ConvertListingItemtoTemplatedListing(listItemText, listingType, ref summary); 			if (templateCall != "") { 				articleText = articleText.Replace(listItemText, templateCall); 				count++; 			} 		} 	} 	if (count > 0) { 		string plural = ""; 		if (count > 1) { 			plural = "s"; 		} 		summary = UpdateEditSummary(summary, "convert " + count + " plain text listing" + plural + " to [[WV:Listings|templated listing" + plural + "]]"); 	} 	return articleText; } private string ConvertListingItemtoTemplatedListing(string listItemText, string listingType, ref string summary) { 	string templateCall = "{{" + listingType + "}}"; 	if (!ProcessListingNameInListItem(ref listItemText, ref templateCall)) { 		// if we don't have a listing name don't bother trying to convert anything else 		return ""; 	} 	ProcessListingPhoneInListItem(ref listItemText, ref templateCall); 	ProcessListingEmailInListItem(ref listItemText, ref templateCall); 	ProcessListingUrlInListItem(ref listItemText, ref templateCall); 	ProcessListingAddressInListItem(ref listItemText, ref templateCall); 	ProcessListingContentInListItem(ref listItemText, ref templateCall); 	return templateCall; } private bool ProcessListingNameInListItem(ref string listItemText, ref string templateCall) { 	Match m = ListingNameRegex.Match(listItemText); 	if (!m.Success || m.Index != 0) { 		// if there isn't a name at the beginning of the listing, don't convert 		return false; 	} 	string name = m.Groups[2].Value.Trim(); 	templateCall = Tools.SetTemplateParameterValue(templateCall, "name", name); 	// see if the name is front-linked 	Match urlMatch = ExternalLinkWithTextRegex.Match(name); 	if (urlMatch.Success && urlMatch.Index == 0) { 		// split the name & url fields 		string url = urlMatch.Groups[1].Value; 		templateCall = Tools.SetTemplateParameterValue(templateCall, "url", url); 		name = urlMatch.Groups[2].Value; 		templateCall = Tools.SetTemplateParameterValue(templateCall, "name", name); 	} 	// remove name from list item text 	listItemText = RemoveValueFromListItemText(listItemText, m.Value); 	return true; } private void ProcessListingPhoneInListItem(ref string listItemText, ref string templateCall) { 	Match phoneMatch = ListingPhoneNumberWithLabelRegex.Match(listItemText); 	if (phoneMatch.Success) { 		string phone = phoneMatch.Groups[4].Value; 		templateCall = Tools.SetTemplateParameterValue(templateCall, "phone", phone); 		listItemText = RemoveValueFromListItemText(listItemText, phoneMatch.Value); 	} 	Match faxMatch = ListingFaxNumberRegex.Match(listItemText); 	if (faxMatch.Success) { 		string fax = faxMatch.Groups[4].Value; 		templateCall = Tools.SetTemplateParameterValue(templateCall, "fax", fax); 		listItemText = RemoveValueFromListItemText(listItemText, faxMatch.Value); 	} 	Match tollfreeMatch = ListingTollfreeNumberRegex.Match(listItemText); 	if (tollfreeMatch.Success) { 		string tollfree = tollfreeMatch.Groups[5].Value; 		templateCall = Tools.SetTemplateParameterValue(templateCall, "tollfree", tollfree); 		listItemText = RemoveValueFromListItemText(listItemText, tollfreeMatch.Value); 	} 	if (!phoneMatch.Success) { 		// try to find a phone number without a label 		phoneMatch = ListingPhoneNumberRegex.Match(listItemText); 		if (phoneMatch.Success) { 			string phone = phoneMatch.Value; 			if (DigitRegex.Matches(phone).Count > 6) { 				// only consider a phone number valid if it contains more than six digits 				templateCall = Tools.SetTemplateParameterValue(templateCall, "phone", phone); 				listItemText = RemoveValueFromListItemText(listItemText, phoneMatch.Value); 			} 		} 	} } private void ProcessListingEmailInListItem(ref string listItemText, ref string templateCall) { 	Match emailMatch = ListingEmailRegex.Match(listItemText); 	if (emailMatch.Success) { 		string email = emailMatch.Groups[2].Value; 		templateCall = Tools.SetTemplateParameterValue(templateCall, "email", email); 		listItemText = RemoveValueFromListItemText(listItemText, emailMatch.Value); 	} } private void ProcessListingUrlInListItem(ref string listItemText, ref string templateCall) { 	if (Tools.GetTemplateParameterValue(templateCall, "url") != "") { 		// url was already set when processing listing name 		return; 	} 	Match urlMatch = FootnoteUrlRegex.Match(listItemText); 	if (urlMatch.Success) { 		string url = urlMatch.Groups[1].Value; 		templateCall = Tools.SetTemplateParameterValue(templateCall, "url", url); 		listItemText = RemoveValueFromListItemText(listItemText, urlMatch.Value); 	} } private void ProcessListingAddressInListItem(ref string listItemText, ref string templateCall) { 	Match addressMatch = ListingAddressRegex.Match(listItemText); 	if (addressMatch.Success) { 		string address = addressMatch.Groups[1].Value; 		Match falsePositiveMatch = ListingAddressFalsePositivesRegex.Match(address); 		if (!falsePositiveMatch.Success) { 			templateCall = Tools.SetTemplateParameterValue(templateCall, "address", address); 			listItemText = RemoveValueFromListItemText(listItemText, addressMatch.Value); 		} 	} else { 		addressMatch = ListingAddressInternationalRegex.Match(listItemText); 		if (addressMatch.Success) { 			string address = addressMatch.Groups[1].Value; 			templateCall = Tools.SetTemplateParameterValue(templateCall, "address", address); 			listItemText = RemoveValueFromListItemText(listItemText, addressMatch.Value); 		} 	} } private void ProcessListingContentInListItem(ref string listItemText, ref string templateCall) { 	if (String.IsNullOrEmpty(listItemText)) { 		return; 	} 	listItemText = SanitizeListingContent(listItemText); 	templateCall = Tools.SetTemplateParameterValue(templateCall, "content", listItemText); } private string RemoveValueFromListItemText(string listItemText, string value) { 	listItemText = listItemText.Replace(value, "").Trim(); 	listItemText = InvalidDuplicatePunctuationRegex.Replace(listItemText, "$1"); 	listItemText = StripLeadingPunctuation(listItemText).Trim(); 	return listItemText; } // Perform various tasks on listings to ensure params and other values // are formatted correctly. private string FormatListings(string articleText, string articleTitle, int wikiNamespace, ref string summary, ref bool skip) { 	string originalText = articleText; 	// loop through all sections 	foreach(KeyValuePair<string, string> sectionData in SplitToSecondLevelSections(articleText)) { 		string sectionName = sectionData.Key; 		string sectionText = sectionData.Value; 		// get all listing tags within the section 		foreach(Match m in ListingTemplateNamesRegex.Matches(sectionText)) { 			string templateCall = m.Value; 			string originalTemplateCall = m.Value; 			templateCall = ConvertGenericListingToSpecificType(templateCall, ref summary); 			templateCall = MatchListingTypeToSection(templateCall, sectionName, ref summary); 			templateCall = FormatListingParams(templateCall, ref summary); 			templateCall = SanitizeListingPhoneNumbers(templateCall, ref summary); 			templateCall = SanitizeListingEmail(templateCall, ref summary); 			templateCall = SanitizeListingLatLong(templateCall, ref summary); 			templateCall = SanitizeListingUrl(templateCall, ref summary); 			templateCall = SanitizeListingContent(templateCall, ref summary); 			if (AGGRESSIVE) { 				templateCall = SanitizeListingAddress(templateCall, articleTitle, ref summary); 			} 			if (!templateCall.Equals(originalTemplateCall)) { 				articleText = articleText.Replace(originalTemplateCall, templateCall); 			} 		} 	} 	return articleText; } // change "{{listing|type=xyz|...}}" to "{{xyz|...}}" private string ConvertGenericListingToSpecificType(string templateCall, ref string summary) { 	string originalTemplateCall = templateCall; 	string listingType = Tools.GetTemplateName(templateCall); 	if (listingType.Equals("listing")) { 		string templateType = Tools.GetTemplateParameterValue(templateCall, "type").ToLower(); 		if (templateType.Equals("see") || templateType.Equals("do") || templateType.Equals("buy") || templateType.Equals("eat") || templateType.Equals("drink") || templateType.Equals("sleep")) { 			templateCall = Tools.RenameTemplate(templateCall, templateType); 			templateCall = Tools.RemoveTemplateParameter(templateCall, "type"); 		} 	} 	if (!templateCall.Equals(originalTemplateCall)) { 		summary = UpdateEditSummary(summary, "update listing type to match expected section type"); 	} 	return templateCall; } // make sure listings are formatted according to the style guidelines in // Wikivoyage:Listings private string FormatListingParams(string templateCall, ref string summary) { 	string originalTemplateCall = templateCall; 	string listingType = Tools.GetTemplateName(templateCall); 	string formattedValue = "{{" + listingType + "\n"; 	// loop through expected template arguments and format appropriately 	foreach(string param in ListingTemplateParamNames) { 		if (param.Equals("content")) { 			continue; 		} 		string paramValue = Tools.GetTemplateParameterValue(templateCall, param); 		if (param.Equals("type") && (paramValue == "" || !listingType.Equals("listing"))) { 			// only listing uses the "type" attribute 			if (paramValue == "") { 				templateCall = Tools.RemoveTemplateParameter(templateCall, param); 			} 			continue; 		} 		if ((param.Equals("image") || param.Equals("lastedit")) && paramValue == "") { 			// empty image & lastedit attributes are unnecessary 			continue; 		} 		if (listingType.Equals("sleep") && param.Equals("hours") && paramValue == "") { 			// sleep listings don't use the "hours" attribute 			if (paramValue == "") { 				templateCall = Tools.RemoveTemplateParameter(templateCall, param); 			} 			continue; 		} 		if (!listingType.Equals("sleep") && (param.Equals("checkin") || param.Equals("checkout"))) { 			// only sleep listings use the "checkin" and "checkout" attributes 			if (paramValue == "") { 				templateCall = Tools.RemoveTemplateParameter(templateCall, param); 			} 			continue; 		} 		if (param.Equals("alt") && paramValue == "" && !ListingAltParamRegex.IsMatch(templateCall)) { 			// do not add an alt tag if it isn't already present 			continue; 		} 		// replace excess whitespace with single spaces 		paramValue = DoubleSpaceRegex.Replace(paramValue, " "); 		formattedValue += "| " + param + "=" + paramValue; 		// add either a newline or a space after the param value, depending on param and template type 		if (param.Equals("email") || param.Equals("directions") || param.Equals("fax") || param.Equals("price") || param.Equals("image") || param.Equals("lastedit")) { 			formattedValue += "\n"; 		} else { 			formattedValue += " "; 		} 		templateCall = Tools.RemoveTemplateParameter(templateCall, param); 	} 	// loop through any unexpected template args and format on their own lines 	int remainingTemplateArgs = Tools.GetTemplateArgumentCount(templateCall); 	foreach(KeyValuePair<string, string> templateParameter in Tools.GetTemplateParameterValues(templateCall)) { 		string param = templateParameter.Key; 		if (param.Equals("content")) { 			continue; 		} 		string paramValue = templateParameter.Value; 		if (paramValue == "") { 			// any empty, unrecognized listing params can be removed 			continue; 		} 		formattedValue += "| " + param + "=" + paramValue + "\n"; 	} 	// now add template content on its own line 	formattedValue += "| content=" + Tools.GetTemplateParameterValue(templateCall, "content") + "\n"; 	formattedValue += "}}"; 	if (!originalTemplateCall.Equals(formattedValue)) { 		// suppress edit summary for now - it is being applied to any article with listings 		// whether they are updated or not 		// summary = UpdateEditSummary(summary, "format listing per [[WV:Listings]]"); 	} 	return formattedValue; } // update the listing phone number fields as much as possible to match // Wikivoyage:Phone numbers private string SanitizeListingPhoneNumbers(string templateCall, ref string summary) { 	string originalTemplateCall = templateCall; 	// loop through expected template arguments and format appropriately 	foreach(string param in ListingPhoneParamNames) { 		string paramValue = Tools.GetTemplateParameterValue(templateCall, param); 		paramValue = SanitizePhoneNumber(paramValue); 		templateCall = Tools.UpdateTemplateParameterValue(templateCall, param, paramValue); 	} 	if (!templateCall.Equals(originalTemplateCall)) { 		summary = UpdateEditSummary(summary, "format listing phone # per [[WV:Phone numbers]]"); 	} 	return templateCall; } // update the listing email field to ensure it is valid private string SanitizeListingEmail(string templateCall, ref string summary) { 	string originalTemplateCall = templateCall; 	string email = Tools.GetTemplateParameterValue(templateCall, "email"); 	email = MailtoRegex.Replace(email, "").Trim(); 	templateCall = Tools.UpdateTemplateParameterValue(templateCall, "email", email); 	if (!templateCall.Equals(originalTemplateCall)) { 		summary = UpdateEditSummary(summary, "fix invalid listing email"); 	} 	return templateCall; } // trim lat/long precision to no more than eight digits private string SanitizeListingLatLong(string templateCall, ref string summary) { 	string originalTemplateCall = templateCall; 	string latitude = Tools.GetTemplateParameterValue(templateCall, "lat"); 	latitude = LatLongTrimRegex.Replace(latitude, "$1"); 	templateCall = Tools.UpdateTemplateParameterValue(templateCall, "lat", latitude); 	string longitude = Tools.GetTemplateParameterValue(templateCall, "long"); 	longitude = LatLongTrimRegex.Replace(longitude, "$1"); 	templateCall = Tools.UpdateTemplateParameterValue(templateCall, "long", longitude); 	if (!templateCall.Equals(originalTemplateCall)) { 		summary = UpdateEditSummary(summary, "trim lat/long precision"); 	} 	return templateCall; } // update the listing URL field to ensure it is valid private string SanitizeListingUrl(string templateCall, ref string summary) { 	string originalTemplateCall = templateCall; 	string url = Tools.GetTemplateParameterValue(templateCall, "url"); 	url = FootnoteUrlRegex.Replace(url, "$1"); 	Match match = ValidUrlRegex.Match(url); 	if (match.Success && !url.ToLower().StartsWith("http://") && !url.ToLower().StartsWith("https://") && !url.ToLower().StartsWith("//")) { 		url = "http://" + url; 	} 	templateCall = Tools.UpdateTemplateParameterValue(templateCall, "url", url); 	if (!templateCall.Equals(originalTemplateCall)) { 		summary = UpdateEditSummary(summary, "fix invalid listing URL"); 	} 	return templateCall; } // make sure the listing content is capitalized, isn't just punctuation, etc. private string SanitizeListingContent(string templateCall, ref string summary) { 	string content = Tools.GetTemplateParameterValue(templateCall, "content"); 	content = SanitizeListingContent(content); 	return Tools.UpdateTemplateParameterValue(templateCall, "content", content); } private string SanitizeListingContent(string content) { 	if (String.IsNullOrEmpty(content)) { 		return content; 	} 	// strip empty punctuation 	content = EmptyPunctuationRegex.Replace(content, ""); 	// strip sentence fragments left over from text-to-listing conversions 	if (content.ToLower().StartsWith("is ")) { 		content = content.Substring("is ".Length); 	} 	// make sure first character is capitalized 	content = Capitalize(content); 	// if only punctuation is left, remove everything 	if (OnlyPunctuationRegex.IsMatch(content)) { 		content = ""; 	} 	return content; } // update the listing address field to ensure it is valid private string SanitizeListingAddress(string templateCall, string articleName, ref string summary) { 	string originalTemplateCall = templateCall; 	string address = Tools.GetTemplateParameterValue(templateCall, "address"); 	if (String.IsNullOrEmpty(address)) { 		return templateCall; 	} 	Match m = ListingAddressIsDirectionsRegex.Match(address); 	if (m.Success) { 		// the address field belongs in the directions field 		string directions = Tools.GetTemplateParameterValue(templateCall, "directions"); 		if (String.IsNullOrEmpty(directions)) { 			templateCall = Tools.UpdateTemplateParameterValue(templateCall, "directions", address); 			templateCall = Tools.UpdateTemplateParameterValue(templateCall, "address", ""); 			summary = UpdateEditSummary(summary, "listing address moved to directions"); 			return templateCall; 		} 	} 	// strip out state and zip code if present 	address = AddressWithStateOrZipRegex.Replace(address, "$1"); 	address = StripStrayPunctuation(address); 	// strip out the city if it is present 	string city = GetCityFromArticleName(articleName); 	if (address.ToLower().EndsWith(city.ToLower())) { 		int pos = address.ToLower().LastIndexOf(city.ToLower()); 		address = address.Substring(0, pos).Trim(); 	} 	address = StripStrayPunctuation(address); 	// properly abbreviate street name 	address = AbbreviateStreeType(address, "Avenue", "Ave"); 	address = AbbreviateStreeType(address, "Boulevard", "Blvd"); 	address = AbbreviateStreeType(address, "Court", "Ct"); 	address = AbbreviateStreeType(address, "Drive", "Dr"); 	address = AbbreviateStreeType(address, "Lane", "Ln"); 	address = AbbreviateStreeType(address, "Place", "Pl"); 	address = AbbreviateStreeType(address, "Road", "Rd"); 	address = AbbreviateStreeType(address, "Street", "St"); 	templateCall = Tools.UpdateTemplateParameterValue(templateCall, "address", address); 	if (!templateCall.Equals(originalTemplateCall)) { 		summary = UpdateEditSummary(summary, "format listing address per [[WV:Listings]]"); 	} 	return templateCall; } // make sure that the listing type matches the section in which the listing // is found (example: "see" listings in the "See" section) private string MatchListingTypeToSection(string templateCall, string sectionName, ref string summary) { 	string originalTemplateCall = templateCall; 	if (sectionName.ToLower() == "eat and drink" || sectionName.ToLower() == "see and do") { 		// skip these "combined" sections 		return templateCall; 	} 	string expectedListingType = GetValidListingTypeForSection(sectionName); 	string listingType = Tools.GetTemplateName(templateCall); 	if (!listingType.Equals(expectedListingType)) { 		templateCall = Tools.RenameTemplate(templateCall, expectedListingType, false); 	} 	if (!templateCall.Equals(originalTemplateCall)) { 		summary = UpdateEditSummary(summary, "update listing type to match expected section type"); 	} 	return templateCall; } // return a map of section name-section content for all second level headings. // the opening text of the article is returned without a section name private static List<KeyValuePair<string, string>> SplitToSecondLevelSections(string articleContent) { 	return SplitToHeadingSections(articleContent, WikiRegexes.HeadingLevelTwo); } private static List<KeyValuePair<string, string>> SplitToThirdLevelSections(string articleContent) { 	return SplitToHeadingSections(articleContent, WikiRegexes.HeadingLevelThree); } private static List<KeyValuePair<string, string>> SplitToHeadingSections(string articleContent, Regex headingRegex) { 	List<KeyValuePair<string, string>> sections = new List<KeyValuePair<string, string>>(); 	int lastmatchpos = 0; 	Match lastMatch = null; 	foreach(Match m in headingRegex.Matches(articleContent)) { 		if (m.Index > 0) { 			// Don't add empty first section if page starts with heading 			string sectionContent = articleContent.Substring(lastmatchpos, m.Index-lastmatchpos); 			string sectionName = (lastMatch != null) ? lastMatch.Groups[1].Value.Trim() : ""; 			sections.Add(new KeyValuePair<string, string>(sectionName, sectionContent)); 		} 		lastmatchpos = m.Index; 		lastMatch = m; 	} 	// Add text of final section 	string sectionContentLast = articleContent.Substring(lastmatchpos); 	string sectionNameLast = (lastMatch != null) ? lastMatch.Groups[1].Value : ""; 	sections.Add(new KeyValuePair<string, string>(sectionNameLast, sectionContentLast)); 	return sections; } private static bool IsEmptySecondLevelSection(string sectionContent) { 	return IsEmptySection(sectionContent, WikiRegexes.HeadingLevelTwo); } private static bool IsEmptyThirdLevelSection(string sectionContent) { 	return IsEmptySection(sectionContent, WikiRegexes.HeadingLevelThree); } private static bool IsEmptySection(string sectionContent, Regex headingRegex) { 	Match m = headingRegex.Match(sectionContent); 	if (!m.Success || m.Index != 0) { 		// this shouldn't happen, but just in case 		return false; 	} 	return (m.Value.Trim().Length == sectionContent.Trim().Length); } // return the expected listing type for the given section name ("See" returns "see"). private static string GetValidListingTypeForSection(string sectionName) { 	if (sectionName == "See") { 		return "see"; 	} if (sectionName == "Do") { 		return "do"; 	} if (sectionName == "Buy") { 		return "buy"; 	} if (sectionName == "Eat" || sectionName.ToLower() == "eat and drink") { 		return "eat"; 	} if (sectionName == "Drink") { 		return "drink"; 	} if (sectionName == "Sleep") { 		return "sleep"; 	} else { 		return "listing"; 	} } private static string SanitizePhoneNumber(string phoneNumberText) { 	// strip out the phone number, in case it is contained within other text 	// such as "888-888-8888 (front office)" 	Match match = PhoneNumberRegex.Match(phoneNumberText); 	if (match.Success) { 		string phoneNumber = match.Value; 		string originalPhoneNumber = match.Value; 		// remove invalid characters 		phoneNumber = phoneNumber.Replace("'", ""); 		phoneNumber = phoneNumber.Replace("(", " ").Trim(); 		phoneNumber = phoneNumber.Replace(")", " ").Trim(); 		// convert periods to dashes 		phoneNumber = phoneNumber.Replace(".", "-"); 		phoneNumber = ExcessWhitespaceRegex.Replace(phoneNumber, " "); 		// if there is a pattern like " -", "- ", "+ " left, replace the space 		phoneNumber = phoneNumber.Replace(" -", "-"); 		phoneNumber = phoneNumber.Replace("- ", "-"); 		phoneNumber = phoneNumber.Replace("+ ", "+"); 		// if the phone number starts with a 1, change it to +1 		if (phoneNumber.StartsWith("1 ") || phoneNumber.StartsWith("1-")) { 			phoneNumber = "+" + phoneNumber; 		} 		phoneNumberText = phoneNumberText.Replace(originalPhoneNumber, phoneNumber); 	} 	return phoneNumberText; } // return the city from the article name.  if the article name is "Foo (Disambiguation)" // then this method returns "Foo". private static string GetCityFromArticleName(string articleName) { 	string basePageName = Tools.BasePageName(articleName); 	Match match = CityNameWithDisambiguationRegex.Match(basePageName); 	return (match.Success) ? match.Groups[1].Value : basePageName; } // if the address ends in a full street type value, convert to the abbreviated value private static string AbbreviateStreeType(string address, string invalidStreetType, string validStreetType) { 	if (address.ToLower().EndsWith(" " + invalidStreetType.ToLower())) { 		int pos = address.ToLower().LastIndexOf(invalidStreetType.ToLower()); 		if (pos > 0) { 			address = address.Substring(0, pos) + validStreetType; 		} 	} 	return address; } // remove any leading or trailing punctuation private static string StripStrayPunctuation(string text) { 	return StripPunctuation(text, true, true); } // remove any leading punctuation private static string StripLeadingPunctuation(string text) { 	return StripPunctuation(text, true, false); } // remove any trailing punctuation private static string StripTrailingPunctuation(string text) { 	return StripPunctuation(text, false, true); } // remove any leading punctuation private static string StripPunctuation(string text, bool stripLeading, bool stripTrailing) { 	if (stripTrailing) { 		text = InvalidTrailingPunctuationRegex.Replace(text, ""); 	} 	if (stripLeading) { 		text = InvalidLeadingPunctuationRegex.Replace(text, ""); 	} 	return text; } // return true if the text matches the pattern, otherwise return null,  the "match" // param will be populated with the match object private static bool MatchText(string matchText, Regex regex, ref Match match) { 	match = regex.Match(matchText); 	return (match.Success); } private static string Capitalize(string text) { 	if (String.IsNullOrEmpty(text)) { 		return text; 	} 	// make sure first character is capitalized 	return (text.Length == 1) ? Char.ToUpper(text[0]) + "" : Char.ToUpper(text[0]) + text.Substring(1); } // add the value to the edit summary if it is not already present private static string UpdateEditSummary(string summary, string textToAdd) { 	if (summary == "") { 		return textToAdd; 	} 	foreach(string summaryField in summary.Split(',')) { 		if (summaryField.Trim().Equals(textToAdd)) { 			// text already present in edit summary 			return summary; 		} 	} 	return summary += ", " + textToAdd; } // append the value to the existing list as a CSV private static string AppendCSV(string currentList, string valueToAdd) { 	if (String.IsNullOrEmpty(currentList)) { 		return valueToAdd; 	} 	return currentList += ", " + valueToAdd; } // return true if the article contains a city status template private static bool IsCityArticle(string articleText) { 	return CityStatusTemplateNamesRegex.IsMatch(articleText); } // return true if the article contains a country status template private static bool IsCountryArticle(string articleText) { 	return CountryStatusTemplateNamesRegex.IsMatch(articleText); } // return true if the article contains a dive guide status template private static bool IsDiveguideArticle(string articleText) { 	return DiveguideStatusTemplateNamesRegex.IsMatch(articleText); } // return true if the article contains a district status template private static bool IsDistrictArticle(string articleText) { 	return DistrictStatusTemplateNamesRegex.IsMatch(articleText); } // return true if the article contains an itinerary status template private static bool IsItineraryArticle(string articleText) { 	return ItineraryStatusTemplateNamesRegex.IsMatch(articleText); } // return true if the article contains a park status template private static bool IsParkArticle(string articleText) { 	return ParkStatusTemplateNamesRegex.IsMatch(articleText); } // return true if the article contains a phrasebook status template private static bool IsPhrasebookArticle(string articleText) { 	return PhrasebookStatusTemplateNamesRegex.IsMatch(articleText); } // return true if the article contains a region status template private static bool IsRegionArticle(string articleText) { 	return RegionStatusTemplateNamesRegex.IsMatch(articleText); } // return true if the article contains a topic status template private static bool IsTopicArticle(string articleText) { 	return TopicStatusTemplateNamesRegex.IsMatch(articleText); } // TODO: // - move tollfree numbers to tollfree in listings // - don't allow "otheruses" to be moved above the page banner