User:Rjwilmsi/genfixes
Appearance
//
// HideMore(string text, bool HideExternalLinks, bool LeaveMetaHeadings, bool HideImages)
WikiFunctions.Parse.HideText ht = new WikiFunctions.Parse.HideText(false, false, true);
public string ProcessArticle(string ArticleText, string ArticleTitle, int wikiNamespace, out string Summary, out bool Skip)
{
const int MAX_ITERATIONS = 5;
string Summary2 = "";
bool Skip2 = false;
Skip = false;
Summary = "";
for(int i = 0; i < MAX_ITERATIONS; i++) // should NEVER get into infinite loop but limit iterations just in case
{
string Before = ArticleText;
ArticleText = ProcessArticleInternal(ArticleText, ArticleTitle, wikiNamespace, out Summary2, out Skip2);
if(!Summary2.Equals(""))
Summary += Summary2;
if(i == 0)
Skip = Skip2; // on later loop Skip2 will come back true
if(i == (MAX_ITERATIONS-1))
LogToFile("[[" + ArticleTitle + "]]" + "@@@reached max iterations");
if(Before.Equals(ArticleText))
break;
}
return(ArticleText);
}
public string ProcessArticleInternal(string ArticleText, string ArticleTitle, int wikiNamespace, out string Summary, out bool Skip)
{
Skip = false;
Summary = "" ;
// String to check whether to requst skip
string OriginalArticleText = ArticleText;
// configuration variables
bool APPLY_AMBIGUOUS_FIXES = false;
bool REPORT_MODE = !awb.AutoTagCheckBox.Checked; // set report mode by disabling auto tagging
const int BRACKET_ON_THREAD_TIMEOUT = 2000;
bool FIXING_TYPOS = awb.RegexTypoFix.Checked; // TODO: test this in checked mode
// to determine edit summary
string ArticleTextBeforeOfDate = "";
string ArticleTextBeforeDelinkingDate = "";
string ArticleTextBeforeLanguageTemplate = "";
string ArticleTextBeforeDOBDOD = "";
string ArticleTextBeforeURL = "";
string ArticleTextBeforeCiteTemplate = "";
string ArticleTextBeforeDuplicateCiteField = "";
string ArticleTextBeforeDEFAULTSORT = "";
string ArticleTextBeforeGuessedDate = "";
string ArticleTextBeforeDayMonthYear = "";
string ArticleTextBeforeReportMode = "";
string ArticleTextBeforeRef = "";
// to determine edit summary counts
int VisibleFixesCheckpoint = 0;
int SpecificFixes = 0;
bool DayMonthYearFixApplied = false;
// for setting Date format
string ArticleLocaleFound = "";
// to determine if visible fixes made, logging
int VisibleFixes = 0;
bool ArticleLogged = false;
if(REPORT_MODE)
APPLY_AMBIGUOUS_FIXES = true;
// mask images & headings
ArticleText = ht.Hide(ArticleText);
// imported date-delinking find & replaces
ArticleTextBeforeDelinkingDate = ArticleText;
// genfixes
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)({\s*\|\s*class\s*=\s*""wikitable[^}]*?)cel(?:lpa|pad?)ding\b", "$1cellpadding"); // cellpadding typo
// date range fixes
//VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{\s*cit[^{}]*\|\s*year\s*=\s*(\[\[)?(\d\d\s*)?(?:January|February|March|April|May|June|July|August|September|October|November|December)", "$1date$2");
//VisibleFixes += RegexReplace(ref ArticleText, @"(?i)(\[\[(?:January|February|March|April|May|June|July|August|September|October|November|December)\s*(?:[1-3]?\d)\]\])\s*(?:[^\d]|&.dash;)\s*(\[\[(?:January|February|March|April|May|June|July|August|September|October|November|December)\s*(?:[1-3]?\d)\]\])(,?)\s*(\[\[\d{3,4}\]\])", "$1$3 $4 – $2$3 $4"); // date range fix Am full wikilinked
//VisibleFixes += RegexReplace(ref ArticleText, @"(?i)(\[\[(?:[1-3]?\d)\s*(?:January|February|March|April|May|June|July|August|September|October|November|December)\]\])\s*(?:[^\d]|&.dash;)\s*(\[\[(?:[1-3]?\d)\s*(?:January|February|March|April|May|June|July|August|September|October|November|December)\]\])(,?)\s*(\[\[\d{3,4}\]\])", "$1$3 $4 – $2$3 $4"); // date range fix Int full wikilink
VisibleFixes += RegexReplace(ref ArticleText, @"(?i)\b(January|February|March|April|May|June|July|August|September|October|November|December)\s*([1-3]?\d)\s*(?:[^\d]|&.dash;)([1-3]?\d,?)\s+\[\[(\d{3,4})\]\]", "$1 $2–$3 $4"); // date range fix Am
VisibleFixes += RegexReplace(ref ArticleText, @"(?i)\b([1-3]?\d)\s*(?:[^\d]|&.dash;)([1-3]?\d)\s+(January|February|March|April|May|June|July|August|September|October|November|December),?\s*\[\[(\d{3,4})\]\]", "$1–$2 $3 $4"); // date range fix Int
// fix [[link}] and [{link2]]
VisibleFixes += RegexReplace(ref ArticleText, @"([^\[\]{}])\[{([^\[\]{}]+\]\])([^\[\]{}])", "$1[[$2$3");
VisibleFixes += RegexReplace(ref ArticleText, @"([^\[\]{}])(\[\[[^\[\]{}]+)}\]([^\[\]{}])", "$1$2]]$3");
// template brackets, fixes {{{template}} and {{template}}}
if(!Regex.IsMatch(ArticleText, @"(?si)(\{\{#if.*?(\{\{\{|\}\}\})|\{\{\{num|num\|\d\}\}\})"))
{
VisibleFixes += RegexReplace(ref ArticleText, @"([^\{\}])\{\{\{([^\{\}!]+)\}\}([^\{\}])", "$1{{$2}}$3");
VisibleFixes += RegexReplace(ref ArticleText, @"([^\{\}])\{\{([^\{\}!]+)\}\}\}([^\{\}])", "$1{{$2}}$3");
}
// cite template fixes
ArticleTextBeforeCiteTemplate = ArticleText;
VisibleFixesCheckpoint = VisibleFixes;
// if date is ambiguous between American and British format, will default to American
ArticleLocaleFound = DetermineArticleDateLocale(ArticleText);
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)(<ref[^\>\<\{\}]*?>\s*\{\{\s*cit[^\{\}]*?)(?:\)\)\s*)?(</ref>)", "$1}}$2"); // fix when citations have no closing }}, or )) instead
if(!Regex.IsMatch(ArticleText, @"(?si)\{\{\s*cite\s*(press\s+release\s+v2|web\s+APA)"))
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)(\{\{\s*cite\s+(?:web|news|press\s+release|journal|magazine))(?:(\s+)|(\s*)\\(\s*))(\w+)", "$1$3|$2$4$5"); // fix when cite templates have no | at start e.g. {{ cite web url=... or {{ cite web\url=...
// (part) wikilinked/external linked URL in cite template, uses MediaWiki regex of [^\[\]<>""\s] for URL bit after http://
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)({{\s*cite\s+[^{}]*\|\s*url\s*=\s*)\[+\s*((?:http://)?[^\[\]<>""\s]+?\s*)\]?(\s*(?:\||}}))", "$1$2$3");
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)({{\s*cite\s+[^{}]*\|\s*url\s*=\s*)\[?\s*((?:http://)?[^\[\]<>""\s]+?\s*)\]+(\s*(?:\||}}))", "$1$2$3");
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)({{\s*cit[^{}]*?\|\s*)(?:fprmat)(\s*=\s*)", "$1format$2"); // Changes 'fprmat' typo to format
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)({{\s*cit[^{}]*?\|\s*)(?:\s*date\s*)?(?:retrieved(?:\s+on)?|(?:last)?accessed|access\s+date)(\s*=\s*)", "$1accessdate$2"); // Changes non-existent retrieved field to accessdate
VisibleFixes += RegexReplace(ref ArticleText, @"(?s)Accessdate", "accessdate", false);
VisibleFixes += RegexReplace(ref ArticleText, @"(?i)\ba(?:cess?s?|ccc?es|ccesss|cccess)date\b", "accessdate");
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)(\{\{\s*cit[^{}]*\|\s*(?:(?:archive|air|access)?date2?|accessdaymonth|accessmonthday)\s*=\s*\[*(?:January|February|March|April|May|June|July|August|September|October|November|December)\s*[0-3]?\d(?:\s*\]+)?)\s*\.\s*(\[*\s*\d{4}\s*\]*)", "$1, $2"); // 'date=January 9. 2008' to 'date=January 9, 2008'
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)(\{\{\s*cit[^{}]*\|\s*(?:(?:archive|air|access)?date2?|accessdaymonth|accessmonthday)\s*=\s*\[*\s*[0-3]?\d\s*(?:January|February|March|April|May|June|July|August|September|October|November|December)(?:\s*\]+)?)\s*[\.,]+(\s*\[*\s*\d{4}\s*\]*)", "$1$2", false); // 'date=9 January, 2008' to 'date=9 January 2008'
// date = YYYY-Month-DD fix
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)({{\s*cit[^{}]*\|\s*(?:archive|air)?date2?\s*=\s*\d{4})[-/\s]Apr(?:il|\.)?[-/\s]([0-3]?\d\s*(?:\||}}))", "$1-04-$2");
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)({{\s*cit[^{}]*\|\s*(?:archive|air)?date2?\s*=\s*\d{4})[-/\s]Aug(?:ust|\.)?[-/\s]([0-3]?\d\s*(?:\||}}))", "$1-08-$2");
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)({{\s*cit[^{}]*\|\s*(?:archive|air)?date2?\s*=\s*\d{4})[-/\s]Dec(?:ember|\.)?[-/\s]([0-3]?\d\s*(?:\||}}))", "$1-12-$2");
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)({{\s*cit[^{}]*\|\s*(?:archive|air)?date2?\s*=\s*\d{4})[-/\s]Feb(?:r?uary|\.)?[-/\s]([0-3]?\d\s*(?:\||}}))", "$1-02-$2");
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)({{\s*cit[^{}]*\|\s*(?:archive|air)?date2?\s*=\s*\d{4})[-/\s]Jan(?:uary|\.)?[-/\s]([0-3]?\d\s*(?:\||}}))", "$1-01-$2");
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)({{\s*cit[^{}]*\|\s*(?:archive|air)?date2?\s*=\s*\d{4})[-/\s]Jul(?:y|\.)?[-/\s]([0-3]?\d\s*(?:\||}}))", "$1-07-$2");
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)({{\s*cit[^{}]*\|\s*(?:archive|air)?date2?\s*=\s*\d{4})[-/\s]Jun(?:e|\.)?[-/\s]([0-3]?\d\s*(?:\||}}))", "$1-06-$2");
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)({{\s*cit[^{}]*\|\s*(?:archive|air)?date2?\s*=\s*\d{4})[-/\s]Mar(?:ch|\.)?[-/\s]([0-3]?\d\s*(?:\||}}))", "$1-03-$2");
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)({{\s*cit[^{}]*\|\s*(?:archive|air)?date2?\s*=\s*\d{4})[-/\s]Nov(?:ember|\.)?[-/\s]([0-3]?\d\s*(?:\||}}))", "$1-11-$2");
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)({{\s*cit[^{}]*\|\s*(?:archive|air)?date2?\s*=\s*\d{4})[-/\s]Oct(?:ober|\.)?[-/\s]([0-3]?\d\s*(?:\||}}))", "$1-10-$2");
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)({{\s*cit[^{}]*\|\s*(?:archive|air)?date2?\s*=\s*\d{4})[-/\s]Sep(?:tember|\.)?[-/\s]([0-3]?\d\s*(?:\||}}))", "$1-09-$2");
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)({{\s*cit[^{}]*\|\s*(?:archive|air)?date2?\s*=\s*\d{4})[-/\s]May\.?[-/\s]([0-3]?\d\s*(?:\||}}))", "$1-05-$2");
// cite month=Mon or month=05 fix
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)({{\s*cit[^{}]*\|\s*month\s*=\s*)(?:Apr\.?|0?4)(\s*(?:\||}}))", "$1April$2");
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)({{\s*cit[^{}]*\|\s*month\s*=\s*)(?:Aug\.?|0?8)(\s*(?:\||}}))", "$1August$2");
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)({{\s*cit[^{}]*\|\s*month\s*=\s*)(?:Dec\.?|12)(\s*(?:\||}}))", "$1December$2");
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)({{\s*cit[^{}]*\|\s*month\s*=\s*)(?:Feb\.?|0?2)(\s*(?:\||}}))", "$1February$2");
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)({{\s*cit[^{}]*\|\s*month\s*=\s*)(?:Jan\.?|0?1)(\s*(?:\||}}))", "$1January$2");
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)({{\s*cit[^{}]*\|\s*month\s*=\s*)(?:Jul\.?|0?7)(\s*(?:\||}}))", "$1July$2");
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)({{\s*cit[^{}]*\|\s*month\s*=\s*)(?:Jun\.?|0?6)(\s*(?:\||}}))", "$1June$2");
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)({{\s*cit[^{}]*\|\s*month\s*=\s*)(?:Mar\.?|0?3)(\s*(?:\||}}))", "$1March$2");
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)({{\s*cit[^{}]*\|\s*month\s*=\s*)(?:Nov\.?|11)(\s*(?:\||}}))", "$1November$2");
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)({{\s*cit[^{}]*\|\s*month\s*=\s*)(?:Oct\.?|10)(\s*(?:\||}}))", "$1October$2");
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)({{\s*cit[^{}]*\|\s*month\s*=\s*)(?:Sep(?:t|\.)?|0?9)(\s*(?:\||}}))", "$1September$2");
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)({{\s*cit[^{}]*\|\s*month\s*=\s*)0?5(\s*(?:\||}}))", "$1May$2");
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)({{\s*cit[^{}]*\|\s*(?:archive|air|access)?date2?\s*=)\s*Page\s+last\s+updated\s+at\s+[0-2]\d:[0-5]\d\s*\w{3},\s*", "$1"); // remove Page last updated at ... from (BBC) date references
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)({{\s*cit[^{}]*\|\s*(?:archive|air|access)?date2?\s*=\s*)(?:(?:Mon|Tues|Wednes|Thurs|Fri|Satur|Sun)day\s*,?|(?:Mon|Tue|Wed|Thu|Fri|Sat|Sun)\s*[,\.]*\s|retrieved(?:\s+on)?|accessed)\s*", "$1"); // remove day of week/"retrieved" from date field
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)({{\s*cit[^{}]*\|\s*)year(\s*=\s*(?:\[\[)?(?:\d\d\s*)?(?:January|February|March|April|May|June|July|August|September|October|November|December))", "$1date$2", false); // year to date when value contains month
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)(\{\{\s*cit[^{}]*\|\s*(?:archive|air|access)?date2?(?:\s*=\s*)(?:(?:January|February|March|April|May|June|July|August|September|October|November|December)\s*[0-3]?\d|[0-3]?\d\s*(?:January|February|March|April|May|June|July|August|September|October|November|December))[,\.]?\s+)'?(0\d\s*(?:\||\}\}))", "${1}20$2"); // 'DD Month YY' to YYYY fix (2000+)
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)(\{\{\s*cit[^{}]*\|\s*(?:archive|air|access)?date2?(?:\s*=\s*)(?:(?:January|February|March|April|May|June|July|August|September|October|November|December)\s*[0-3]?\d|[0-3]?\d\s*(?:January|February|March|April|May|June|July|August|September|October|November|December))[,\.]?\s+)'?([4-9]\d\s*(?:\||\}\}))", "${1}19$2"); // 'DD Month YY' to YYYY fix (1940+)
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)({{\s*cit[^{}]*\|\s*)date(\s*=\s*(?:\[\[)?(?:\d?\d\s*)?(?:January|February|March|April|May|June|July|August|September|October|November|December)(?:\s*\d?\d,?)?)(?:\s*\|)\s*year\s*=\s*(\d{4})(\s*(?:\||}}))", "$1date$2 $3$4"); // date and year combiner
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)({{\s*cit[^{}]*\|\s*)year\s*=\s*(\d{4})\s*\|\s*date(\s*=\s*(?:\[\[)?(?:\d?\d\s*)?(?:January|February|March|April|May|June|July|August|September|October|November|December)(?:\s*\d?\d,?)?)(\s*(?:\||}}))", "$1date$3 $2$4"); // date and year combiner 2 of 2
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)({{\s*cit[^{}]*\|\s*)(?:date|month)(\s*=\s*[12]\d{3}\s*(?:\||}))", "$1year$2", false); // date=YYYY or month=YYYY to year fix
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)({{\s*cit[^{}]*\|\s*)year(\s*=\s*\[\[(?:January|February|March|April|May|June|July|August|September|October|November|December)\s*[0-3]?\d\]?\]?,?\s*\[?\[?(\d{4})\]?\]?)", "$1date$2"); // Fixes wikilinked year field to date (American)
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)({{\s*cit[^{}]*\|\s*)year(\s*=\s*\[?\[?[0-3]?\d\s*(?:January|February|March|April|May|June|July|August|September|October|November|December)\]?\]?,?\s*\[?\[?\d{4}\]?\]?)", "$1date$2"); // Fixes wikilinked year field to date (International)
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)({{\s*cit[^{}]*\|\s*(?:archive|air|access)?date2?\s*=\s*)([0-3]?\d)-((?:January|February|March|April|May|June|July|August|September|October|November|December),?)-(\d{2,4})", "$1$2 $3 $4"); // convert dashed dates to spaces, allows YY
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)({{\s*cit[^{}]*\|\s*(?:archive|air|access)?date2?\s*=\s*)(January|February|March|April|May|June|July|August|September|October|November|December)-([0-3]?\d,?)-(\d{2,4})", "$1$2 $3 $4"); // convert dashed dates to spaces, allows YY
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)({{\s*cite\s*(?:news[^g]|paper|press|episode|video)[^{}]*\|\s*accessdate\s*=\s*)(?:(January|February|March|April|May|June|July|August|September|October|November|December)\s+0?([1-3]?\d)|0?([1-3]?\d)\s*(January|February|March|April|May|June|July|August|September|October|November|December))(?:\s*\|)\s*accessyear\s*=\s*(200\d)(\s*(?:\||}}))", "$1$2 $3$4 $5 $6$7"); // accessyear and accessdate combiner (not for cite web as this displays correctly as-is)
VisibleFixes += LoopedRegexReplace(ref ArticleText, @"(?si)({{\s*cite\s+(?:web|news|press\s+release)[^{}]*\|\s*(?:access|archive)date\s*=\s*)\[\[(200\d|19[7-9]\d)-([0-1]?\d)-([0-3]?\d)\]\]", "$1$2-$3-$4"); // Fixes wikilinked ISO cite web accessdate, which displays with visible square brackets for web/news/pr templates
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)({{\s*cite\s+[^{}]*\|\s*(?:access|archive)date\s*=\s*)\[\[((?:January|February|March|April|May|June|July|August|September|October|November|December)\s*[0-3]?\d)\]\],?\s*\[\[(\d{4})\]\]", "$1$2 $3"); // wikilinked Am accessdate
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)({{\s*cite\s+[^{}]*\|\s*(?:access|archive)date\s*=\s*)\[\[([0-3]?\d\s*(?:January|February|March|April|May|June|July|August|September|October|November|December))\]\],?\s*\[\[(\d{4})\]\]", "$1$2 $3"); // wikilinked Int accessdate
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)({{\s*cit[^{}]*\|\s*(?:(?:archive|air|access)?date2?|accessdaymonth|accessmonthday)\s*=[^}\|]*?)Jan\.?([^u])", "$1January$2"); // Mon to Month expander for January for all cite dates
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)({{\s*cit[^{}]*\|\s*(?:(?:archive|air|access)?date2?|accessdaymonth|accessmonthday)\s*=[^}\|]*?)Feb\.?([^r])", "$1February$2"); // Mon to Month expander for February for all cite dates
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)({{\s*cit[^{}]*\|\s*(?:(?:archive|air|access)?date2?|accessdaymonth|accessmonthday)\s*=[^}\|]*?)Mar\.?([^c])", "$1March$2"); // Mon to Month expander for March for all cite dates
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)({{\s*cit[^{}]*\|\s*(?:(?:archive|air|access)?date2?|accessdaymonth|accessmonthday)\s*=[^}\|]*?)Apr\.?([^i])", "$1April$2"); // Mon to Month expander for April for all cite dates
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)({{\s*cit[^{}]*\|\s*(?:(?:archive|air|access)?date2?|accessdaymonth|accessmonthday)\s*=[^}\|]*?)Jun\.?([^e])", "$1June$2"); // Mon to Month expander for June for all cite dates
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)({{\s*cit[^{}]*\|\s*(?:(?:archive|air|access)?date2?|accessdaymonth|accessmonthday)\s*=[^}\|]*?)Jul\.?([^y])", "$1July$2"); // Mon to Month expander for July for all cite dates
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)({{\s*cit[^{}]*\|\s*(?:(?:archive|air|access)?date2?|accessdaymonth|accessmonthday)\s*=[^}\|]*?)Aug\.?([^u])", "$1August$2"); // Mon to Month expander for August for all cite dates
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)({{\s*cit[^{}]*\|\s*(?:(?:archive|air|access)?date2?|accessdaymonth|accessmonthday)\s*=[^}\|]*?)Sept?\.?([^te])", "$1September$2"); // Mon to Month expander for September for all cite dates
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)({{\s*cit[^{}]*\|\s*(?:(?:archive|air|access)?date2?|accessdaymonth|accessmonthday)\s*=[^}\|]*?)Oct\.?([^o])", "$1October$2"); // Mon to Month expander for October for all cite dates
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)({{\s*cit[^{}]*\|\s*(?:(?:archive|air|access)?date2?|accessdaymonth|accessmonthday)\s*=[^}\|]*?)Nov\.?([^e])", "$1November$2"); // Mon to Month expander for November for all cite dates
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)({{\s*cit[^{}]*\|\s*(?:(?:archive|air|access)?date2?|accessdaymonth|accessmonthday)\s*=[^}\|]*?)Dec\.?([^e])", "$1December$2"); // Mon to Month expander for December for all cite dates
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)({{\s*cit[^{}]*\|\s*(?:archive|air)?date2?\s*=\s*)(\[?\[?(?:January|February|March|April|May|June|July|August|September|October|November|December))[,\.]?\s*([0-3]?\d\]?\]?)(?:st|nd|rd|th)[,\.]?(\s*\[?\[?(?:200\d|19\d\d)\]?\]?)", "$1$2 $3,$4"); // removes ordinals, extra commas from American dates
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)({{\s*cit[^{}]*\|\s*(?:archive|air)?date2?\s*=\s*)(\[?\[?[0-3]?\d)\s*(?:st|nd|rd|th)[,\.]?\s*((?:January|February|March|April|May|June|July|August|September|October|November|December)\]?\]?)[,\.]?\s*(\[?\[?(?:200\d|19\d\d)\]?\]?)", "$1$2 $3 $4"); // removes ordinals, extra commas from International dates
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)({{\s*cit[^{}]*\|\s*(?:(?:archive|air|access)?date2?|accessdaymonth|accessmonthday\s*=\s*))(\[?\[?(?:January|February|March|April|May|June|July|August|September|October|November|December))[,\.]?\s*([0-3]?\d\]?\]?)\s*(?:st|nd|rd|th)" , "$1$2 $3"); // remove ordinals from 'Month Dth'
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)({{\s*cit[^{}]*\|\s*(?:(?:archive|air|access)?date2?|accessdaymonth|accessmonthday\s*=\s*))(\[?\[?[0-3]?\d)\s*(?:st|nd|rd|th)\s*((?:January|February|March|April|May|June|July|August|September|October|November|December)\]?\]?)" , "$1$2 $3"); // remove ordinals from 'Dth Month'
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)({{\s*cite\s*(?:web|book|journal|magazine)[^{}]*\|\s*)date(\s*=\s*)(January|February|March|April|May|June|July|August|September|October|November|December),?\s*(\d{4})(\s*(?:\||}}))", "$1month$2$3 | year$2$4$5", false); // date to year and month, for templates where these fields exist
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)({{\s*cite\s*(?:web|book|journal|magazine)[^{}]*\|\s*)date(\s*=\s*)(\d{4})\s*(January|February|March|April|May|June|July|August|September|October|November|December)(\s*(?:\||}}))", "$1year$2$3 | month$2$4$5", false); // 'date=YYYY Month' to year and month, for templates where these fields exist
// date=DD month=Month year=YYYY fix (or fields in different order)
ArticleTextBeforeDayMonthYear = ArticleText;
if(ArticleLocaleFound.Equals("US"))
{
// date month year, date year month (US format)
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)(\{\{\s*cit[^{}]*\|\s*\bdate\s*=\s*)([0-3]?\d)\s*(\|[^{}]*?(?:\|\s*)?)\bmonth\s*=\s*(January|February|March|April|May|June|July|August|September|October|November|December)\s*\|([^{}]*)\byear\s*=\s*(\d{4})\s*(?:\||(\}\}))", "$1 $4 $2, $6 $3$5$7");
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)(\{\{\s*cit[^{}]*\|\s*\bdate\s*=\s*)([0-3]?\d)\s*(\|[^{}]*?(?:\|\s*)?)\byear\s*=\s*(\d{4})\s*\|([^{}]*)\bmonth\s*=\s*(January|February|March|April|May|June|July|August|September|October|November|December)\s*(?:\||(\}\}))", "$1 $6 $2, $4 $3$5$7");
// month year date, month date year (US format)
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)(\{\{\s*cit[^{}]*\|\s*)\bmonth\s*=\s*(January|February|March|April|May|June|July|August|September|October|November|December)\s*(\|[^{}]*?(?:\|\s*)?)\byear\s*=\s*(\d{4})\s*\|([^{}]*\bdate\s*=\s*)([0-3]?\d)\s*(?:\||(\}\}))", "$1$5 $2 $6, $4 $3$7");
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)(\{\{\s*cit[^{}]*\|\s*)\bmonth\s*=\s*(January|February|March|April|May|June|July|August|September|October|November|December)\s*\|([^{}]*?(?:\|\s*)?\bdate\s*=\s*)([0-3]?\d)\s*(\|[^{}]*)\byear\s*=\s*(\d{4})\s*(?:\||(\}\}))", "$1$3 $2 $4, $6 $5$7");
// year date month, year month date (US format)
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)(\{\{\s*cit[^{}]*)\|\s*\byear\s*=\s*(\d{4})\s*(\|[^{}]*?(?:\|\s*)?\bdate\s*=\s*)([0-3]?\d)(\s*\|[^{}]*)\bmonth\s*=\s*(January|February|March|April|May|June|July|August|September|October|November|December)\s*(?:\||(\}\}))", "$1$3 $6 $4, $2 $5$7");
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)(\{\{\s*cit[^{}]*\|)\s*\byear\s*=\s*(\d{4})\s*(\|[^{}]*?(?:\|\s*)?)\bmonth\s*=\s*(January|February|March|April|May|June|July|August|September|October|November|December)\s*\|([^{}]*\bdate\s*=\s*)([0-3]?\d)\s*(?:\||(\}\}))", "$1$5 $4 $6, $2 $3$7");
}
else // Intl format
{
// date month year, date year month (International format)
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)(\{\{\s*cit[^{}]*\|\s*\bdate\s*=\s*[0-3]?\d)\s*(\|[^{}]*?(?:\|\s*)?)\bmonth\s*=\s*(January|February|March|April|May|June|July|August|September|October|November|December)\s*\|([^{}]*)\byear\s*=\s*(\d{4})\s*(?:\||(\}\}))", "$1 $3 $5 $2$4$6");
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)(\{\{\s*cit[^{}]*\|\s*\bdate\s*=\s*[0-3]?\d)\s*(\|[^{}]*?(?:\|\s*)?)\byear\s*=\s*(\d{4})\s*\|([^{}]*)\bmonth\s*=\s*(January|February|March|April|May|June|July|August|September|October|November|December)\s*(?:\||(\}\}))", "$1 $5 $3 $2$4$6");
// month year date, month date year (International format)
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)(\{\{\s*cit[^{}]*\|\s*)\bmonth\s*=\s*(January|February|March|April|May|June|July|August|September|October|November|December)\s*(\|[^{}]*?(?:\|\s*)?)\byear\s*=\s*(\d{4})\s*\|([^{}]*\bdate\s*=\s*[0-3]?\d)\s*(?:\||(\}\}))", "$1$5 $2 $4 $3 $6");
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)(\{\{\s*cit[^{}]*\|\s*)\bmonth\s*=\s*(January|February|March|April|May|June|July|August|September|October|November|December)\s*\|([^{}]*?(?:\|\s*)?\bdate\s*=\s*[0-3]?\d)\s*(\|[^{}]*)\byear\s*=\s*(\d{4})\s*(?:\||(\}\}))", "$1$3 $2 $5 $4 $6");
// year date month, year month date (International format)
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)(\{\{\s*cit[^{}]*)\|\s*\byear\s*=\s*(\d{4})\s*(\|[^{}]*?(?:\|\s*)?\bdate\s*=\s*[0-3]?\d)(\s*\|[^{}]*)\bmonth\s*=\s*(January|February|March|April|May|June|July|August|September|October|November|December)\s*(?:\||(\}\}))", "$1$3 $5 $2 $4$6");
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)(\{\{\s*cit[^{}]*)\|\s*\byear\s*=\s*(\d{4})\s*(\|[^{}]*?(?:\|\s*)?)\bmonth\s*=\s*(January|February|March|April|May|June|July|August|September|October|November|December)\s*\|([^{}]*\bdate\s*=\s*[0-3]?\d)\s*(\||\}\})", "$1$3$5 $4 $2 $6");
}
if(!ArticleTextBeforeDayMonthYear.Equals(ArticleText))
DayMonthYearFixApplied=true;
//VisibleFixes += RegexReplace(ref ArticleText, @"(?si)({{\s*cite\s*(?:[ac-ikln-v][^{}]*)\|\s*)month(\s*=\s*)(January|February|March|April|May|June|July|August|September|October|November|December) \| year(?:\s*=\s*)(\d{4})(\s*(?:\||}}))", "$1date$2$3 $4$5"); // year and month to date, where these fields don't exist (do exist for web, book, journal
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)(\{\{\s*cit[^{}]*\|\s*(?:archive|air|access)?date2?(?:\s*=\s*)(?:(?:January|February|March|April|May|June|July|August|September|October|November|December)\s*[0-3]?\d|[0-3]?\d\s*(?:January|February|March|April|May|June|July|August|September|October|November|December))[,\.]?\s+)'?(0\d\s*(?:\||\}\}))", "${1}20$2"); // 'DD Month YY' to YYYY fix (2000+)
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)(\{\{\s*cit[^{}]*\|\s*(?:archive|air|access)?date2?(?:\s*=\s*)(?:(?:January|February|March|April|May|June|July|August|September|October|November|December)\s*[0-3]?\d|[0-3]?\d\s*(?:January|February|March|April|May|June|July|August|September|October|November|December))[,\.]?\s+)'?([4-9]\d\s*(?:\||\}\}))", "${1}19$2"); // 'DD Month YY' to YYYY fix (1940+)
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)({{\s*[Cc]ite\s+[^{}]*\|\s*(?:archive|air|access)?date2?\s*=\s*)([0-3]?\d)-((?:January|February|March|April|May|June|July|August|September|October|November|December),?)-(\d{2,4})", "$1$2 $3 $4"); // convert dashed dates to spaces
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)({{\s*[Cc]ite\s+[^{}]*\|\s*(?:archive|air|access)?date2?\s*=\s*)(January|February|March|April|May|June|July|August|September|October|November|December)-([0-3]?\d),?-(\d{2,4})", "$1$2 $3 $4"); // convert dashed dates to spaces
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)(\{\{\s*cit[^{}]*\|\s*)accessdate\s*=\s*([0-3]?\d)\s*\|([^{}]*?(?:\|\s*)?)((?:accessdaymonth|accessmonthday)\s*=\s*(?:January|February|March|April|May|June|July|August|September|October|November|December))(\s*(?:\||\}\}))" , "$1$3$4 $2$5"); // accessdaymonth|accessmonthday = Month | accessdate = D?D combiner, 1 of 2
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)(\{\{\s*cit[^{}]*\|\s*)((?:accessdaymonth|accessmonthday)\s*=\s*(?:January|February|March|April|May|June|July|August|September|October|November|December))(\s*\|[^{}]*?)(?:\|\s*)?accessdate\s*=\s*([0-3]?\d)\s*(\||\}\})" , "$1$2 $4$3$5"); // accessdaymonth|accessmonthday = Month | accessdate = D?D combiner, 2 of 2
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)({{\s*[Cc]it[^{}]*\|\s*date\s*=\s*)(?:((?:January|February|March|April|May|June|July|August|September|October|November|December)\s*)0(\d,?)|0(\d\s+(?:January|February|March|April|May|June|July|August|September|October|November|December)))(\s+\d{2,4}\s*(?:\||}}))", "$1$2$3$4$5"); // removes leading zeros in days Am or Int dates
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)(\{\{\s*cit[^{}]*\|\s*(?:accessdaymonth|accessmonthday)\s*=\s*)(?:((?:January|February|March|April|May|June|July|August|September|October|November|December)\s*)0(\d,?)|0(\d\s+(?:January|February|March|April|May|June|July|August|September|October|November|December)))(\s*(?:\||\}\}))", "$1$2$3$4$5"); // removes leading zeros in days Am or Int Month + 0D, TODO scan & test
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)({{\s*[Cc]ite\s*(?:news[^g]|paper|press|episode|video)[^{}]*\|\s*accessdate\s*=\s*)(?:(January|February|March|April|May|June|July|August|September|October|November|December)\s+0?([1-3]?\d)|0?([1-3]?\d)\s*(January|February|March|April|May|June|July|August|September|October|November|December))(?:\s*\|)\s*accessyear\s*=\s*(200\d)(\s*(?:\||}}))", "$1$2 $3$4 $5 $6$7"); // accessyear and accessdate combiner (not for cite web as this displays correctly as-is)
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)(\{\{\s*cit[^{}]*\|\s*access)monthday(\s*=\s*[0-3]?\d\s*(?:January|February|March|April|May|June|July|August|September|October|November|December)\s*(?:\||\}\}))" , "$1daymonth$2", false); // accessmonthday = Month D fix
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)(\{\{\s*cit[^{}]*\|\s*access)daymonth(\s*=\s*(?:January|February|March|April|May|June|July|August|September|October|November|December)\s*[0-3]?\d\s*(?:\||\}\}))" , "$1monthday$2", false); // accessdaymonth = D Month fix
// tidy up || or |}} (maybe with whitespace between) if one of above fixes introduced it
VisibleFixes += LoopedRegexReplace(ref ArticleText, @"(?si)(\{\{\s*cit[^{}]*)\|\s*(\}\}|\|)", "$1$2", false);
// accessdate/archivedate only fixes
if(Regex.IsMatch(ArticleText, @"(?i)\b(access|archive)date\s*="))
{
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)((?:access|archive)date\s*=\s*)(1[0-2])[/_\-\.]?(1[3-9])[/_\-\.]?(?:20)?(0\d)\b", "${1}20$4-$2-$3");
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)((?:access|archive)date\s*=\s*)(1[0-2])[/_\-\.]?([2-3]\d)[/_\-\.]?(?:20)?(0\d)\b", "${1}20$4-$2-$3");
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)((?:access|archive)date\s*=\s*)(1[0-2])[/_\-\.]?\2[/_\-\.]?(?:20)?(0\d)\b", "${1}20$3-$2-$2"); // nn-nn-2004 and nn-nn-04 to ISO format (both nn the same)
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)((?:access|archive)date\s*=\s*)(1[3-9])[/_\-\.]?(1[0-2])[/_\-\.]?(?:20)?(0\d)\b", "${1}20$4-$3-$2");
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)((?:access|archive)date\s*=\s*)(1[3-9])[/_\-\.]?0?([1-9])[/_\-\.]?(?:20)?(0\d)\b", "${1}20$4-0$3-$2");
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)((?:access|archive)date\s*=\s*)(200\d)0?([01]\d)[/_\-\.]([0-3]\d\s*(?:\||}}))", "$1$2-$3-$4");
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)((?:access|archive)date\s*=\s*)(200\d)[/_\-\.]([01]\d)0?([0-3]\d\s*(?:\||}}))", "$1$2-$3-$4");
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)((?:access|archive)date\s*=\s*)(200\d)[/_\-\.]?([01]\d)[/_\-\.]?([1-9]\s*(?:\||}}))", "$1$2-$3-0$4");
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)((?:access|archive)date\s*=\s*)(200\d)[/_\-\.]?([1-9])[/_\-\.]?([0-3]\d\s*(?:\||}}))", "$1$2-0$3-$4");
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)((?:access|archive)date\s*=\s*)(200\d)[/_\-\.]?([1-9])[/_\-\.]0?([1-9]\s*(?:\||}}))", "$1$2-0$3-0$4");
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)((?:access|archive)date\s*=\s*)(200\d)[/_\-\.]0?([1-9])[/_\-\.]([1-9]\s*(?:\||}}))", "$1$2-0$3-0$4");
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)((?:access|archive)date\s*=\s*)(200\d)[/_\.]?([01]\d)[/_\.]?([0-3]\d\s*(?:\||}}))", "$1$2-$3-$4");
// begin ISO conversion fixes for accessdate, archivedate, may soon be unnecessary due to template changes under discussion
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)((?:access|archive)date\s*=\s*)(?:\[\[)?([0-3]\d)(?:st|nd|rd|th)? Apr(?:il|\.)?[,\.]?(?:\]\])?[,\.]?\s*(?:20)?(0[4-8])(?:\]\])?", "${1}20$3-04-$2");
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)((?:access|archive)date\s*=\s*)(?:\[\[)?([0-3]\d)(?:st|nd|rd|th)? Aug(?:ust|\.)?[,\.]?(?:\]\])?[,\.]?\s*(?:20)?(0[4-8])(?:\]\])?", "${1}20$3-08-$2");
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)((?:access|archive)date\s*=\s*)(?:\[\[)?([0-3]\d)(?:st|nd|rd|th)? Dec(?:ember|\.)?[,\.]?(?:\]\])?[,\.]?\s*(?:20)?(0[4-8])(?:\]\])?", "${1}20$3-12-$2");
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)((?:access|archive)date\s*=\s*)(?:\[\[)?([0-3]\d)(?:st|nd|rd|th)? Feb(?:r?uary|\.)?[,\.]?(?:\]\])?[,\.]?\s*(?:20)?(0[4-8])(?:\]\])?", "${1}20$3-02-$2");
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)((?:access|archive)date\s*=\s*)(?:\[\[)?([0-3]\d)(?:st|nd|rd|th)? Jan(?:uary|\.)?[,\.]?(?:\]\])?[,\.]?\s*(?:20)?(0[4-8])(?:\]\])?", "${1}20$3-01-$2");
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)((?:access|archive)date\s*=\s*)(?:\[\[)?([0-3]\d)(?:st|nd|rd|th)? Jul(?:y|\.)?[,\.]?(?:\]\])?[,\.]?\s*(?:20)?(0[4-8])(?:\]\])?", "${1}20$3-07-$2");
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)((?:access|archive)date\s*=\s*)(?:\[\[)?([0-3]\d)(?:st|nd|rd|th)? Jun(?:e|\.)?[,\.]?(?:\]\])?[,\.]?\s*(?:20)?(0[4-8])(?:\]\])?", "${1}20$3-06-$2");
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)((?:access|archive)date\s*=\s*)(?:\[\[)?([0-3]\d)(?:st|nd|rd|th)? Mar(?:ch|\.)?[,\.]?(?:\]\])?[,\.]?\s*(?:20)?(0[4-8])(?:\]\])?", "${1}20$3-03-$2");
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)((?:access|archive)date\s*=\s*)(?:\[\[)?([0-3]\d)(?:st|nd|rd|th)? May(?:\]\])?[,\.]?\s*(?:20)?(0[4-8])(?:\]\])?", "${1}20$3-05-$2");
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)((?:access|archive)date\s*=\s*)(?:\[\[)?([0-3]\d)(?:st|nd|rd|th)? Nov(?:ember|\.)?[,\.]?(?:\]\])?[,\.]?\s*(?:20)?(0[4-8])(?:\]\])?", "${1}20$3-11-$2");
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)((?:access|archive)date\s*=\s*)(?:\[\[)?([0-3]\d)(?:st|nd|rd|th)? Oct(?:ober|\.)?[,\.]?(?:\]\])?[,\.]?\s*(?:20)?(0[4-8])(?:\]\])?", "${1}20$3-10-$2");
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)((?:access|archive)date\s*=\s*)(?:\[\[)?([0-3]\d)(?:st|nd|rd|th)? Sep(?:tember|\.)?[,\.]?(?:\]\])?[,\.]?\s*(?:20)?(0[4-8])(?:\]\])?", "${1}20$3-09-$2");
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)((?:access|archive)date\s*=\s*)(?:\[\[)?([1-9])(?:st|nd|rd|th)? Apr(?:il|\.)?[,\.]?(?:\]\])?[,\.]?\s*(?:20)?(0[4-8])(?:\]\])?", "${1}20$3-04-0$2");
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)((?:access|archive)date\s*=\s*)(?:\[\[)?([1-9])(?:st|nd|rd|th)? Aug(?:ust|\.)?[,\.]?(?:\]\])?[,\.]?\s*(?:20)?(0[4-8])(?:\]\])?", "${1}20$3-08-0$2");
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)((?:access|archive)date\s*=\s*)(?:\[\[)?([1-9])(?:st|nd|rd|th)? Dec(?:ember|\.)?[,\.]?(?:\]\])?[,\.]?\s*(?:20)?(0[4-8])(?:\]\])?", "${1}20$3-12-0$2");
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)((?:access|archive)date\s*=\s*)(?:\[\[)?([1-9])(?:st|nd|rd|th)? Feb(?:r?uary|\.)?[,\.]?(?:\]\])?[,\.]?\s*(?:20)?(0[4-8])(?:\]\])?", "${1}20$3-02-0$2");
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)((?:access|archive)date\s*=\s*)(?:\[\[)?([1-9])(?:st|nd|rd|th)? Jan(?:uary|\.)?[,\.]?(?:\]\])?[,\.]?\s*(?:20)?(0[4-8])(?:\]\])?", "${1}20$3-01-0$2");
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)((?:access|archive)date\s*=\s*)(?:\[\[)?([1-9])(?:st|nd|rd|th)? Jul(?:y|\.)?[,\.]?(?:\]\])?[,\.]?\s*(?:20)?(0[4-8])(?:\]\])?", "${1}20$3-07-0$2");
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)((?:access|archive)date\s*=\s*)(?:\[\[)?([1-9])(?:st|nd|rd|th)? Jun(?:e|\.)?[,\.]?(?:\]\])?[,\.]?\s*(?:20)?(0[4-8])(?:\]\])?", "${1}20$3-06-0$2");
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)((?:access|archive)date\s*=\s*)(?:\[\[)?([1-9])(?:st|nd|rd|th)? Mar(?:ch|\.)?[,\.]?(?:\]\])?[,\.]?\s*(?:20)?(0[4-8])(?:\]\])?", "${1}20$3-03-0$2");
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)((?:access|archive)date\s*=\s*)(?:\[\[)?([1-9])(?:st|nd|rd|th)? May[,\.]?(?:\]\])?\s*(?:20)?(0[4-8])(?:\]\])?", "${1}20$3-05-0$2");
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)((?:access|archive)date\s*=\s*)(?:\[\[)?([1-9])(?:st|nd|rd|th)? Nov(?:ember|\.)?[,\.]?(?:\]\])?[,\.]?\s*(?:20)?(0[4-8])(?:\]\])?", "${1}20$3-11-0$2");
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)((?:access|archive)date\s*=\s*)(?:\[\[)?([1-9])(?:st|nd|rd|th)? Oct(?:ober|\.)?[,\.]?(?:\]\])?[,\.]?\s*(?:20)?(0[4-8])(?:\]\])?", "${1}20$3-10-0$2");
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)((?:access|archive)date\s*=\s*)(?:\[\[)?([1-9])(?:st|nd|rd|th)? Sep(?:tember|\.)?[,\.]?(?:\]\])?[,\.]?\s*(?:20)?(0[4-8])(?:\]\])?", "${1}20$3-09-0$2");
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)((?:access|archive)date\s*=\s*)(?:\[\[)?Apr(?:il|\.)?\s+([0-3]\d)(?:\]\])?(?:st|nd|rd|th)?[,\.]?(?:\]\])?[,\.]?\s+(?:20)?(0\d)(?:\]\])?", "${1}20$3-04-$2");
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)((?:access|archive)date\s*=\s*)(?:\[\[)?Apr(?:il|\.)?\s+([1-9])(?:st|nd|rd|th)?[,\.]?(?:\]\])?[,\.]?\s*(?:20)?(0[4-8])(?:\]\])?", "${1}20$3-04-0$2");
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)((?:access|archive)date\s*=\s*)(?:\[\[)?Aug(?:ust|\.)?\s+([0-3]\d)(?:\]\])?(?:st|nd|rd|th)?[,\.]?(?:\]\])?[,\.]?\s+(?:20)?(0\d)(?:\]\])?", "${1}20$3-08-$2");
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)((?:access|archive)date\s*=\s*)(?:\[\[)?Aug(?:ust|\.)?\s+([1-9])(?:st|nd|rd|th)?[,\.]?(?:\]\])?[,\.]?\s*(?:20)?(0[4-8])(?:\]\])?", "${1}20$3-08-0$2");
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)((?:access|archive)date\s*=\s*)(?:\[\[)?Dec(?:ember|\.)?\s+([0-3]\d)(?:\]\])?(?:st|nd|rd|th)?[,\.]?(?:\]\])?[,\.]?\s+(?:20)?(0\d)(?:\]\])?", "${1}20$3-12-$2");
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)((?:access|archive)date\s*=\s*)(?:\[\[)?Dec(?:ember|\.)?\s+([1-9])(?:st|nd|rd|th)?[,\.]?(?:\]\])?[,\.]?\s*(?:20)?(0[4-8])(?:\]\])?", "${1}20$3-12-0$2");
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)((?:access|archive)date\s*=\s*)(?:\[\[)?Feb(?:r?uary|\.)?\s+([0-3]\d)(?:\]\])?(?:st|nd|rd|th)?[,\.]?(?:\]\])?[,\.]?\s+(?:20)?(0\d)(?:\]\])?", "${1}20$3-02-$2");
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)((?:access|archive)date\s*=\s*)(?:\[\[)?Feb(?:r?uary|\.)?\s+([1-9])(?:st|nd|rd|th)?[,\.]?(?:\]\])?[,\.]?\s*(?:20)?(0[4-8])(?:\]\])?", "${1}20$3-02-0$2");
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)((?:access|archive)date\s*=\s*)(?:\[\[)?Jan(?:uary|\.)?\s+([0-3]\d)(?:\]\])?(?:st|nd|rd|th)?[,\.]?(?:\]\])?[,\.]?\s+(?:20)?(0\d)(?:\]\])?", "${1}20$3-01-$2");
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)((?:access|archive)date\s*=\s*)(?:\[\[)?Jan(?:uary|\.)?\s+([1-9])(?:st|nd|rd|th)?[,\.]?(?:\]\])?[,\.]?\s*(?:20)?(0[4-8])(?:\]\])?", "${1}20$3-01-0$2");
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)((?:access|archive)date\s*=\s*)(?:\[\[)?Jul(?:y|\.)?\s+([0-3]\d)(?:\]\])?(?:st|nd|rd|th)?[,\.]?(?:\]\])?[,\.]?\s+(?:20)?(0\d)(?:\]\])?", "${1}20$3-07-$2");
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)((?:access|archive)date\s*=\s*)(?:\[\[)?Jul(?:y|\.)?\s+([1-9])(?:st|nd|rd|th)?[,\.]?(?:\]\])?[,\.]?\s*(?:20)?(0[4-8])(?:\]\])?", "${1}20$3-07-0$2");
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)((?:access|archive)date\s*=\s*)(?:\[\[)?Jun(?:e|\.)?\s+([0-3]\d)(?:\]\])?(?:st|nd|rd|th)?[,\.]?(?:\]\])?[,\.]?\s+(?:20)?(0\d)(?:\]\])?", "${1}20$3-06-$2");
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)((?:access|archive)date\s*=\s*)(?:\[\[)?Jun(?:e|\.)?\s+([1-9])(?:st|nd|rd|th)?[,\.]?(?:\]\])?[,\.]?\s*(?:20)?(0[4-8])(?:\]\])?", "${1}20$3-06-0$2");
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)((?:access|archive)date\s*=\s*)(?:\[\[)?Mar(?:ch|\.)?\s+([0-3]\d)(?:\]\])?(?:st|nd|rd|th)?[,\.]?(?:\]\])?[,\.]?\s+(?:20)?(0\d)(?:\]\])?", "${1}20$3-03-$2");
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)((?:access|archive)date\s*=\s*)(?:\[\[)?Mar(?:ch|\.)?\s+([1-9])(?:st|nd|rd|th)?[,\.]?(?:\]\])?[,\.]?\s*(?:20)?(0[4-8])(?:\]\])?", "${1}20$3-03-0$2");
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)((?:access|archive)date\s*=\s*)(?:\[\[)?May ([0-3]\d)(?:\]\])?(?:st|nd|rd|th)?[,\.]?(?:\]\])?[,\.]?\s+(?:20)?(0\d)(?:\]\])?", "${1}20$3-05-$2");
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)((?:access|archive)date\s*=\s*)(?:\[\[)?May ([1-9])(?:st|nd|rd|th)?[,\.]?(?:\]\])?[,\.]?\s*(?:20)?(0[4-8])(?:\]\])?", "${1}20$3-05-0$2");
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)((?:access|archive)date\s*=\s*)(?:\[\[)?Nov(?:ember|\.)?\s+([0-3]\d)(?:\]\])?(?:st|nd|rd|th)?[,\.]?(?:\]\])?[,\.]?\s+(?:20)?(0\d)(?:\]\])?", "${1}20$3-11-$2");
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)((?:access|archive)date\s*=\s*)(?:\[\[)?Nov(?:ember|\.)?\s+([1-9])(?:st|nd|rd|th)?[,\.]?(?:\]\])?[,\.]?\s*(?:20)?(0[4-8])(?:\]\])?", "${1}20$3-11-0$2");
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)((?:access|archive)date\s*=\s*)(?:\[\[)?Oct(?:ober|\.)?\s+([0-3]\d)(?:\]\])?(?:st|nd|rd|th)?[,\.]?(?:\]\])?[,\.]?\s+(?:20)?(0\d)(?:\]\])?", "${1}20$3-10-$2");
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)((?:access|archive)date\s*=\s*)(?:\[\[)?Oct(?:ober|\.)?\s+([1-9])(?:st|nd|rd|th)?[,\.]?(?:\]\])?[,\.]?\s*(?:20)?(0[4-8])(?:\]\])?", "${1}20$3-10-0$2");
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)((?:access|archive)date\s*=\s*)(?:\[\[)?Sep(?:tember|\.)?\s+([0-3]\d)(?:\]\])?(?:st|nd|rd|th)?[,\.]?(?:\]\])?[,\.]?\s+(?:20)?(0\d)(?:\]\])?", "${1}20$3-09-$2");
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)((?:access|archive)date\s*=\s*)(?:\[\[)?Sep(?:tember|\.)?\s+([1-9])(?:st|nd|rd|th)?[,\.]?(?:\]\])?[,\.]?\s*(?:20)?(0[4-8])(?:\]\])?", "${1}20$3-09-0$2");
// end ISO conversion fixes
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)((?:access|archive)date\s*=\s*)([2-3]\d)[/_\-\.]?(1[0-2])[/_\-\.]?(?:20)?(0\d)", "${1}20$4-$3-$2");
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)((?:access|archive)date\s*=\s*)([2-3]\d)[/_\-\.]0?([1-9])[/_\-\.](?:20)?(0\d)", "${1}20$4-0$3-$2");
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)((?:access|archive)date\s*=\s*)0?([1-9])[/_\-\.]?(1[3-9]|[2-3]\d)[/_\-\.]?(?:20)?(0\d)", "${1}20$4-0$2-$3");
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)((?:access|archive)date\s*=\s*)0?([1-9])[/_\-\.]?0?\2[/_\-\.]?(?:20)?(0\d)", "${1}20$3-0$2-0$2"); // n-n-2004 and n-n-04 to ISO format (both n the same)
}
// cite date fixes only
if(Regex.IsMatch(ArticleText, @"(?si){{\s*cit[^{}]*\|\s*(?:archive|air)?date2?\s*="))
{
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)({{\s*cit[^{}]*\|\s*(?:archive|air)?date2?\s*=\s*\[?\[?)(200\d|19[7-9]\d)[/_]?([0-1]\d)[/_]?([0-3]\d\s*(?:\||}}))", "$1$2-$3-$4");
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)({{\s*cit[^{}]*\|\s*(?:archive|air)?date2?\s*=\s*\[?\[?)(1[0-2])[/_\-\.]?([2-3]\d)[/_\-\.]?(19[7-9]\d)\b", "$1$4-$2-$3");
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)({{\s*cit[^{}]*\|\s*(?:archive|air)?date2?\s*=\s*\[?\[?)0?([1-9])[/_\-\.]?([2-3]\d)[/_\-\.]?(19[7-9]\d)\b", "$1$4-0$2-$3");
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)({{\s*cit[^{}]*\|\s*(?:archive|air)?date2?\s*=\s*\[?\[?)([2-3]\d)[/_\-\.]?0?([1-9])[/_\-\.]?(19[7-9]\d)\b", "$1$4-0$3-$2");
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)({{\s*cit[^{}]*\|\s*(?:archive|air)?date2?\s*=\s*\[?\[?)([2-3]\d)[/_\-\.]?(1[0-2])[/_\-\.]?(19[7-9]\d)\b", "$1$4-$3-$2");
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)({{\s*cit[^{}]*\|\s*(?:archive|air)?date2?\s*=\s*\[?\[?)(1[0-2])[/_\-\.]([2-3]\d)[/_\-\.](?:20)?(0\d)\b", "${1}20$4-$2-$3");
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)({{\s*cit[^{}]*\|\s*(?:archive|air)?date2?\s*=\s*\[?\[?)0?([1-9])[/_\-\.]([2-3]\d)[/_\-\.](?:20)?(0\d)\b", "${1}20$4-0$2-$3");
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)({{\s*cit[^{}]*\|\s*(?:archive|air)?date2?\s*=\s*\[?\[?)([2-3]\d)[/_\-\.]0?([1-9])[/_\-\.](?:20)?(0\d)\b", "${1}20$4-0$3-$2");
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)({{\s*cit[^{}]*\|\s*(?:archive|air)?date2?\s*=\s*\[?\[?)([2-3]\d)[/_\-\.]?(1[0-2])[/_\-\.]?(?:20)?(0\d)\b", "${1}20$4-$3-$2");
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)({{\s*cit[^{}]*\|\s*(?:archive|air)?date2?\s*=\s*\[?\[?)(1[0-2])[/_\-\.]?(1[3-9])[/_\-\.]?(19[7-9]\d)\b", "$1$4-$2-$3");
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)({{\s*cit[^{}]*\|\s*(?:archive|air)?date2?\s*=\s*\[?\[?)0?([1-9])[/_\-\.](1[3-9])[/_\-\.](19[7-9]\d)\b", "$1$4-0$2-$3");
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)({{\s*cit[^{}]*\|\s*(?:archive|air)?date2?\s*=\s*\[?\[?)(1[3-9])[/_\-\.]?0?([1-9])[/_\-\.]?(19[7-9]\d)\b", "$1$4-0$3-$2");
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)({{\s*cit[^{}]*\|\s*(?:archive|air)?date2?\s*=\s*\[?\[?)(1[3-9])[/_\-\.]?(1[0-2])[/_\-\.]?(19[7-9]\d)\b", "$1$4-$3-$2");
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)({{\s*cit[^{}]*\|\s*(?:archive|air)?date2?\s*=\s*\[?\[?)(1[0-2])[/_\-\.]?(1[3-9])[/_\-\.]?(?:20)?(0\d)\b", "${1}20$4-$2-$3");
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)({{\s*cit[^{}]*\|\s*(?:archive|air)?date2?\s*=\s*\[?\[?)([1-9])[/_\-\.](1[3-9])[/_\-\.](?:20)?(0\d)\b", "${1}20$4-0$2-$3");
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)({{\s*cit[^{}]*\|\s*(?:archive|air)?date2?\s*=\s*\[?\[?)(1[3-9])[/_\-\.]?([1-9])[/_\-\.](?:20)?(0\d)\b", "${1}20$4-0$3-$2");
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)({{\s*cit[^{}]*\|\s*(?:archive|air)?date2?\s*=\s*\[?\[?)(1[3-9])[/_\-\.](1[0-2])[/_\-\.](?:20)?(0\d)\b", "${1}20$4-$3-$2");
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)({{\s*cit[^{}]*\|\s*(?:archive|air)?date2?\s*=\s*)0?([1-9])[/_\-\.]0?\2[/_\-\.](200\d|19[7-9]\d)\b", "$1$3-0$2-0$2"); // n-n-2004 and n-n-1980 to ISO format (both n the same)
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)({{\s*cit[^{}]*\|\s*(?:archive|air)?date2?\s*=\s*)0?([1-9])[/_\-\.]0?\2[/_\-\.](0\d)\b", "${1}20$3-0$2-0$2"); // n-n-04 to ISO format (both n the same)
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)({{\s*cit[^{}]*\|\s*(?:archive|air)?date2?\s*=\s*)(1[0-2])[/_\-\.]?\2[/_\-\.]?(200\d|19[7-9]\d)\b", "$1$3-$2-$2"); // nn-nn-2004 and nn-nn-1980 to ISO format (both nn the same)
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)({{\s*cit[^{}]*\|\s*(?:archive|air)?date2?\s*=\s*)(1[0-2])[/_\-\.]?\2[/_\-\.]?(0\d)\b", "${1}20$3-$2-$2"); // nn-nn-04 to ISO format (both nn the same)
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)({{\s*cit[^{}]*\|\s*(?:archive|air)?date2?\s*=\s*)((?:\[\[)?200\d|19[7-9]\d)[/_\-\.]([1-9])[/_\-\.]0?([1-9](?:\]\])?\s*(?:\||}}))", "$1$2-0$3-0$4");
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)({{\s*cit[^{}]*\|\s*(?:archive|air)?date2?\s*=\s*)((?:\[\[)?200\d|19[7-9]\d)[/_\-\.]0?([1-9])[/_\-\.]([1-9](?:\]\])?\s*(?:\||}}))", "$1$2-0$3-0$4");
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)({{\s*cit[^{}]*\|\s*(?:archive|air)?date2?\s*=\s*)((?:\[\[)?200\d|19[7-9]\d)[/_\-\.]?([0-1]\d)[/_\-\.]?([1-9](?:\]\])?\s*(?:\||}}))", "$1$2-$3-0$4");
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)({{\s*cit[^{}]*\|\s*(?:archive|air)?date2?\s*=\s*)((?:\[\[)?200\d|19[7-9]\d)[/_\-\.]?([1-9])[/_\-\.]?([0-3]\d(?:\]\])?\s*(?:\||}}))", "$1$2-0$3-$4");
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)({{\s*cit[^{}]*\|\s*(?:archive|air)?date2?\s*=\s*)((?:\[\[)?200\d|19[7-9]\d)([0-1]\d)[/_\-\.]([0-3]\d(?:\]\])?\s*(?:\||}}))", "$1$2-$3-$4");
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)({{\s*cit[^{}]*\|\s*(?:archive|air)?date2?\s*=\s*)((?:\[\[)?200\d|19[7-9]\d)[/_\-\.]([0-1]\d)0?([0-3]\d(?:\]\])?\s*(?:\||}}))", "$1$2-$3-$4");
}
VisibleFixes += LoopedRegexReplace(ref ArticleText, @"(?si)({{\s*cit[^{}]*\|\s*(?:archive|air|access)?date2?\s*=\s*(?:\[\[)?200\d)-([2-3]\d|1[3-9])-(0[1-9]|1[0-2])(\]\])?", "$1-$3-$2$4"); // YYYY-DD-MM to YYYY-MM-DD
VisibleFixes += LoopedRegexReplace(ref ArticleText, @"(?si)(\{\{\s*cite[^\{\}]*\|\s*(?:archive|air|access)?date2?\s*=\s*(?:(?:200\d|19[7-9]\d)-[01]?\d-[0-3]?\d|[0-3]?\d\s*\w+,?\s*(?:200\d|19[7-9]\d)|\w+\s*[0-3]?\d,?\s*(?:200\d|19[7-9]\d)))(\s*[,-:]?\s+[0-2]?\d\:?[0-5]\d(?:\:?[0-5]\d)?\s*[^\|\}]*)", "$1<!--$2-->"); // Removes time from date fields
// format ambiguous cite dates
ArticleTextBeforeGuessedDate = ArticleText;
if(APPLY_AMBIGUOUS_FIXES)
{
// if date is ambiguous between American and British format, will default to American
if(ArticleLocaleFound.Equals("US"))
{
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)({{\s*cit[^{}]*\|\s*(?:archive|air|access)?date2?\s*=\s*)(1[0-2])[/_\-\.]0?([1-9])[/_\-\.](?:20)?(0\d)\b", "${1}20$4-$2-0$3");
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)({{\s*cit[^{}]*\|\s*(?:archive|air|access)?date2?\s*=\s*)(1[0-2])[/_\-\.](1[0-2])[/_\-\.](?:20)?(0\d)\b", "${1}20$4-$2-$3");
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)({{\s*cit[^{}]*\|\s*(?:archive|air|access)?date2?\s*=\s*)0?([1-9])[/_\-\.]0?([1-9])[/_\-\.](?:20)?(0\d)\b", "${1}20$4-0$2-0$3");
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)({{\s*cit[^{}]*\|\s*(?:archive|air|access)?date2?\s*=\s*)0?([1-9])[/_\-\.](1[0-2])[/_\-\.](?:20)?(0\d)\b", "${1}20$4-0$2-$3");
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)({{\s*cit[^{}]*\|\s*(?:archive|air|access)?date2?\s*=\s*)(1[0-2])[/_\-\.]?0?([1-9])[/_\-\.]?(200\d|19[6-9]\d)\b", "$1$4-$2-0$3");
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)({{\s*cit[^{}]*\|\s*(?:archive|air|access)?date2?\s*=\s*)(1[0-2])[/_\-\.]?(1[0-2])[/_\-\.]?(200\d|19[6-9]\d)\b", "$1$4-$2-$3");
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)({{\s*cit[^{}]*\|\s*(?:archive|air|access)?date2?\s*=\s*)0?([1-9])[/_\-\.]?0?([1-9])[/_\-\.]?(200\d|19[6-9]\d)\b", "$1$4-0$2-0$3");
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)({{\s*cit[^{}]*\|\s*(?:archive|air|access)?date2?\s*=\s*)0?([1-9])[/_\-\.]?(1[0-2])[/_\-\.]?(200\d|19[6-9]\d)\b", "$1$4-0$2-$3");
}
else // Intl format
{
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)({{\s*cit[^{}]*\|\s*(?:archive|air|access)?date2?\s*=\s*)(1[0-2])[/_\-\.]0?([1-9])[/_\-\.](?:20)?(0\d)\b", "${1}20$4-0$3-$2");
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)({{\s*cit[^{}]*\|\s*(?:archive|air|access)?date2?\s*=\s*)(1[0-2])[/_\-\.](1[0-2])[/_\-\.](?:20)?(0\d)\b", "${1}20$4-$3-$2");
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)({{\s*cit[^{}]*\|\s*(?:archive|air|access)?date2?\s*=\s*)0?([1-9])[/_\-\.]0?([1-9])[/_\-\.](?:20)?(0\d)\b", "${1}20$4-0$3-0$2");
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)({{\s*cit[^{}]*\|\s*(?:archive|air|access)?date2?\s*=\s*)0?([1-9])[/_\-\.](1[0-2])[/_\-\.](?:20)?(0\d)\b", "${1}20$4-$3-0$2");
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)({{\s*cit[^{}]*\|\s*(?:archive|air|access)?date2?\s*=\s*)(1[0-2])[/_\-\.]?0?([1-9])[/_\-\.]?(200\d|19[6-9]\d)\b", "$1$4-0$3-$2");
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)({{\s*cit[^{}]*\|\s*(?:archive|air|access)?date2?\s*=\s*)(1[0-2])[/_\-\.]?(1[0-2])[/_\-\.]?(200\d|19[6-9]\d)\b", "$1$4-$3-$2");
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)({{\s*cit[^{}]*\|\s*(?:archive|air|access)?date2?\s*=\s*)0?([1-9])[/_\-\.]?0?([1-9])[/_\-\.]?(200\d|19[6-9]\d)\b", "$1$4-0$3-0$2");
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)({{\s*cit[^{}]*\|\s*(?:archive|air|access)?date2?\s*=\s*)0?([1-9])[/_\-\.]?(1[0-2])[/_\-\.]?(200\d|19[6-9]\d)\b", "$1$4-$3-0$2");
}
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)({{\s*cit[^{}]*\|\s*(?:archive|air|access)?date2?\s*=\s*(?:200\d|19[7-9]\d)-[0-1]\d-[0-3]\d\s+)([0-2]?\d\:?[0-5]\d(?:\:?[0-5]\d)?\s*[^[\|}]*)", "$1<!--$2-->"); // Removes time from ISO date fields
}
SpecificFixes = VisibleFixes-VisibleFixesCheckpoint;
if (!ArticleTextBeforeCiteTemplate.Equals(ArticleText) && SpecificFixes > 0)
Summary += String.Format("format cite template dates ({0}), ", SpecificFixes);
if(!ArticleTextBeforeGuessedDate.Equals(ArticleText))
Summary += "format ambiguous cite dates (set " + ArticleLocaleFound + " format), ";
if(DayMonthYearFixApplied)
Summary += "combine day, month & year, ";
// date of birth / death fixes
ArticleTextBeforeDOBDOD = ArticleText;
VisibleFixesCheckpoint = VisibleFixes;
VisibleFixes += RegexReplace(ref ArticleText, @"(?mi)('''[^'\n]+'''.+?[\(;,]\s*)d\.(\s+\[*(?:(?:January|February|March|April|May|June|July|August|September|October|November|December)\s+[0-3]?\d|[0-3]?\d\s*(?:January|February|March|April|May|June|July|August|September|October|November|December))?\]*,?\s*\[*[1-2]?\d{3}\]*)\s*\)", "$1died$2)"); // date of death
VisibleFixes += RegexReplace(ref ArticleText, @"(?mi)^('''[^'\n]+'''.+?[\(;,]\s*)(?:born|b\.)\s*(\[*(?:(?:January|February|March|April|May|June|July|August|September|October|November|December)\s+[0-3]?\d|[0-3]?\d\s*(?:January|February|March|April|May|June|July|August|September|October|November|December))?\]*,?\s*\[*[1-2]?\d{3}\]*.*?)\s*(?:[,;:\-–]|&.dash;)\s*(?:died|d\.)\s*(\[*(?:(?:January|February|March|April|May|June|July|August|September|October|November|December)\s+[0-3]?\d|[0-3]?\d\s*(?:January|February|March|April|May|June|July|August|September|October|November|December))?\]*,?\s*\[*[1-2]?\d{3}\]*.*?)\s*\)", "$1$2 – $3)"); // birth and death, (regex shares date clause logic with birth, death fixes)
VisibleFixes += RegexReplace(ref ArticleText, @"(?mi)(^'''[^'\n]+'''.+?[\(;,]\s*)d\.(\s*\[*(?:(?:January|February|March|April|May|June|July|August|September|October|November|December)\s+[0-3]?\d|[0-3]?\d\s*(?:January|February|March|April|May|June|July|August|September|October|November|December))?\]*,?\s*\[*[1-2]?\d{3}\]*)\s*\)", "$1died $2)"); // date of death
VisibleFixes += RegexReplace(ref ArticleText, @"(?mi)^('''[^'\n]+'''.+?[\(;,]\s*)b\.(\s*\[*(?:(?:January|February|March|April|May|June|July|August|September|October|November|December)\s+[0-3]?\d|[0-3]?\d\s*(?:January|February|March|April|May|June|July|August|September|October|November|December))?\]*,?\s*\[*[1-2]?\d{3}\]*)\s*\)", "$1born $2)"); // date of birth (regex find is 1 character different to date of death fix)
if(!ArticleTextBeforeDOBDOD.Equals(ArticleText))
{
SpecificFixes = VisibleFixes - VisibleFixesCheckpoint;
Summary += String.Format("format date of birth/death ({0}), ", SpecificFixes);
}
// URL format fixes, may be caught by AWB gen fixes before reaching here
ArticleTextBeforeURL = ArticleText;
VisibleFixesCheckpoint = VisibleFixes;
VisibleFixes += RegexReplace(ref ArticleText, @"(?i)([\s\[>=]ht|f)tp//(\w+)", "$1tp://$2"); // Fixes missing colon in http:// or ftp:// external link
VisibleFixes += RegexReplace(ref ArticleText, @"(?i)([\s\[>=])htp://(\w+)", "$1http://$2"); // 'htp' instead of 'http' in an external link
if(!Regex.IsMatch(ArticleText,@"HTTP/\d\."))
VisibleFixes += RegexReplace(ref ArticleText, @"(?i)([\s\[>=]ht|f)tp:(?:/|///)(\w+)", "$1tp://$2"); // Fixes single or triple slash in ftp or http external link
VisibleFixes += RegexReplace(ref ArticleText, @"(?i)([\s\[>=])((?:ht|f)tp:?/+)(\2)+", "$1$2"); // Fixes multiple http:// or ftp:// in an external link
VisibleFixes += RegexReplace(ref ArticleText, @"(?i)(\[(?:http://|www\.)[^\[\]<>""\s]*?)\|''", "$1 ''"); // fixes [www.site.com|''my cool site''] which links with the bar at the end of the URL
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)({{\s*cite\s+web[^{}]*\|\s*url\s*=\s*)(www\.)", "$1http://$2"); // for cite web the URL requires http:// at start
VisibleFixes += RegexReplace(ref ArticleText, @"(?i)(http://ww)\.", "$1w."); // fixes http://ww.
// ref format fixes, TODO scan &test
ArticleTextBeforeRef = ArticleText;
VisibleFixesCheckpoint = VisibleFixes;
// whitespace cleaning
VisibleFixes += LoopedRegexReplace(ref ArticleText, @"(?s)<\s*(?:\s+ref\s*|\s*ref\s+)>", "<ref>", false);
VisibleFixes += LoopedRegexReplace(ref ArticleText, @"(?s)<(?:\s*/(?:\s+ref\s*|\s*ref\s+)|\s+/\s*ref\s*)>", "</ref>", false);
// <ref name="Fred" /ref> --> <ref name="Fred"/>
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)(<\s*ref\s+name\s*=\s*""[^<>={}""]+?"")\s*/\s*ref\s*>", "$1/>");
// <ref name="Fred".> --> <ref name="Fred"/>
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)(<\s*ref\s+name\s*=\s*""[^<>={}""]+?"")[^/\s]>", "$1/>");
// <ref name=foo bar> --> <ref name="foo bar">
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)(<\s*ref\s+name\s*=\s*)([^<>={}""]+?)(\s*/?>)", @"$1""$2""$3", false);
// <ref name=foo bar"> --> <ref name="foo bar">
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)(<\s*ref\s+name\s*=\s*)([^<>={}""]+?)""(\s*/?>)", @"$1""$2""$3");
// <ref name="foo bar> --> <ref name="foo bar">
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)(<\s*ref\s+name\s*=\s*)""([^<>={}""]+?)(\s*/?>)", @"$1""$2""$3");
// <ref name "foo bar"> --> <ref name="foo bar">
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)(<\s*ref\s+name\s*)[\+\-]?(\s*""[^<>={}""]+?""\s*/?>)", @"$1=$2");
// <ref "foo bar"> --> <ref name="foo bar">
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)(<\s*ref\s+)(""[^<>={}""]+?""\s*/?>)", "$1name=$2");
// ref name typos
VisibleFixes += RegexReplace(ref ArticleText, @"(?i)(<\s*ref\s+n)(me\s*=)", "$1a$2");
// <ref>...<ref/> --> <ref>...</ref>
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)(<\s*ref(?:\s+name\s*=.*?)?\s*>[^<>{}""]+?)<\s*ref\s*/\s*>", "$1</ref>");
// <ref>...</red> --> <ref>...</ref>
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)(<\s*ref(?:\s+name\s*=[^<>{}]*?)?\s*>[^<>{}""]+?)<\s*/\s*red\s*>", "$1</ref>");
// <ref name="Fred">Fred</ref> --> <ref name="Fred"/>
VisibleFixes += RegexReplace(ref ArticleText, @"(?s)(<\s*ref\s+name\s*=\s*""([^<>={}""]+?)""\s*)>\2</ref>", "$1/>");
// <ref>...<ref><ref> --> <ref>...</ref><ref>
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)(<\s*ref(?:\s+name\s*=[^<>{}]*?)?\s*>[^<>{}""]+?)<ref>(\s*<ref>)", "$1</ref>$2");
// <ref name="Fred"><ref name="Bert"> --> <ref name="Fred"/><ref name="Bert">
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)(<\s*ref\s+name\s*=\s*""[^<>={}""]+?""\s*)>(<\s*ref\s+name\s*=\s*""[^<>={}""]+?""\s*>)", "$1/>$2");
// <ref name="Fred">...<ref> --> <ref name="Fred">...</ref>
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)(<\s*ref\s+name\s*=\s*""[^<>={}""]+?""\s*>[^<>{}""]+?)<ref>", "$1</ref>");
// <ref>[URL words]<ref> --> <ref>[URL words]</ref>
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)(<\s*ref[^<>{}\/]*?>\s*\[+\s*http://[^\[\]<>""\s]+[^<>{}\]\/]+?\]+\s*)<\s*(ref\s*>)", "$1</$2");
if(!ArticleTextBeforeRef.Equals(ArticleText))
{
SpecificFixes = VisibleFixes - VisibleFixesCheckpoint;
Summary += String.Format("fix ref format ({0}), ", SpecificFixes);
}
// end ref format fixes
// convert <ref>[[http:// to <ref>[http://
VisibleFixes += RegexReplace(ref ArticleText, @"(?i)(<ref.*?>\[)\[+(http://)", "$1$2");
// complete square brackets around external links in ref tags
VisibleFixes += RegexReplace(ref ArticleText, @"(?i)(<ref.*?>[^\[\]<>]*?\[\s*http://[^\[\]<>]*?)(?:(\w)})?(</ref>)", "$1$2]$3");
VisibleFixes += RegexReplace(ref ArticleText, @"(?i)(<ref.*?>)(\s*http://[^\[\]<>]*?\][^\[\]<>]*?</ref>)", "$1[$2");
// more bracket fixes on external links
BracketOnThread bot = new BracketOnThread(ArticleText);
System.Threading.Thread a = new System.Threading.Thread(new System.Threading.ThreadStart(bot.Do));
a.Start();
System.Threading.Thread.Sleep(BRACKET_ON_THREAD_TIMEOUT/2);
if(a.IsAlive)
System.Threading.Thread.Sleep(BRACKET_ON_THREAD_TIMEOUT/2);
if(a.IsAlive)
{
LogToFile("[[" + ArticleTitle + "]]" + "@@@bracket fix timeout");
a.Abort();
}
ArticleText = bot.getArticleText();
VisibleFixes += bot.getVisibleFixes();
if(!ArticleTextBeforeURL.Equals(ArticleText))
{
SpecificFixes = VisibleFixes - VisibleFixesCheckpoint;
Summary += String.Format("fix web link format ({0}), ", SpecificFixes);
}
VisibleFixes += RegexReplace(ref ArticleText, @"(?mi)^\s*''for\s(.+?),\s+(?:please\s)?see\s+\[\[(.+?)\]\]\n", "{{for|$1|$2}}\n");
//VisibleFixes += RegexReplace(ref ArticleText, @"(?!\[\[September 11,? 2001\]\])\[\[\s*(January|February|March|April|May|June|July|August|September|October|November|December)\s*0?([1-3]?\d)(,?)\s*(200\d|19[7-9]\d)\s*\]\]", "[[$1 $2]]$3 [[$4]]"); // [[Month DD, YYYY]] fix
//VisibleFixes += RegexReplace(ref ArticleText, @"\[\[\s*0?([1-3]?\d)\s*(January|February|March|April|May|June|July|August|September|October|November|December),?\s*(200\d|19[7-9]\d)\s*\]\]", "[[$1 $2]] [[$3]]"); // [[DD Month YYYY]] fix
if(Regex.IsMatch(ArticleText, @"(?si){{\s*cit[^{}]*\|\s*url\s*=\s*[^\|}{]+?\.PDF"))
VisibleFixes += AddPDFFormatField(ref ArticleText);
ArticleTextBeforeDuplicateCiteField = ArticleText;
// remove year and month entries if they match a date entry
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)(\{\{\s*cit[^{}]*\|\s*date\s*=\s*[^{}\|=]*?\b(\d{4})\b(?:\s*\|?[^{}]*?))\|\s*\byear\s*=\s*\2\s*(\||\}\})", "$1$3", false); // date then year
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)(\{\{\s*cit[^{}]*)\|\s*year\s*=\s*(\d{4})\s*((?:\|[^{}]*?)?\|?\s*\bdate\s*=\s*[^{}\|=]*?\b\2\s*([^{}\|=]*?\||\}\}))", "$1$3", false); // year then date
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)(\{\{\s*cit[^{}]*\|\s*date\s*=\s*[^{}\|=]*?\b(January|February|March|April|May|June|July|August|September|October|November|December)\b(?:\s*\|?[^{}]*?))\|\s*\bmonth\s*=\s*\2\s*(\||\}\})", "$1$3", false); // date then month
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)(\{\{\s*cit[^{}]*)\|\s*month\s*=\s*(January|February|March|April|May|June|July|August|September|October|November|December)\s*((?:\|[^{}]*?)?\|?\s*\bdate\s*=\s*[^{}\|=]*?\b\2\s*([^{}\|=]*?(\||\}\})))", "$1$3", false); // month then date
VisibleFixes += LoopedRegexReplace(ref ArticleText, @"(?s)({{\s*[Cc]it[^{}]*\|\s*)(\w+)\s*=\s*([^\|}{]+?)\s*\|((?:[^{}]*?\|)?\s*)\2(\s*=\s*)\3(\s*(\||\}\}))", "$1$4$2$5$3$6", false); // duplicate field remover for cite templates
VisibleFixes += LoopedRegexReplace(ref ArticleText, @"(?s)(\{\{\s*cit[^{}]*\|\s*)(\w+)(\s*=\s*[^\|}{\s][^\|}{]+?\s*\|?(?:[^{}]*?)?)\|\s*\2\s*=\s*(\||\}\})", "$1$2$3$4", false); // 'field=populated | field=null' drop field=null
VisibleFixes += LoopedRegexReplace(ref ArticleText, @"(?s)(\{\{\s*cit[^{}]*\|\s*)(\w+)\s*=\s*\|\s*((?:[^{}]+?\|)?\s*\2\s*=\s*[^\|}{\s])", "$1$3", false); // 'field=null | field=populated' drop field=null
if(!ArticleTextBeforeDuplicateCiteField.Equals(ArticleText))
{
SpecificFixes = VisibleFixes - VisibleFixesCheckpoint;
Summary += "rm dupe cite field(s), ";
}
// DEFAULTSORT fixes, may be caught by AWB gen fixes before reaching here
ArticleTextBeforeDEFAULTSORT = ArticleText;
VisibleFixes += RegexReplace(ref ArticleText, @"(?s)({{DEFAULTSORT:(.*?)}})\s+(.*?){{DEFAULTSORT:\2}}\s+", "$1\n$3", false); // double defaultsort remover
VisibleFixes += LoopedRegexReplace(ref ArticleText, @"(?s)(\n\[\[Category:)([^\]]+\]\])(.*?)\1\2", "$1$2$3", false); // duplicate category remover
VisibleFixes += LoopedRegexReplace(ref ArticleText, @"(?s){{DEFAULTSORT\:([^{}]+)}}(.*?\[\[Category\:[^{}]+)\|\1\]\]", "{{DEFAULTSORT:$1}}$2]]", false); // defautsort with category cleaner
VisibleFixes += RegexReplace(ref ArticleText, @"({{DEFAULTSORT.*?}}\n)(.*?)(\[\[Category:)", "$2$1$3", false); // moves defaultsort to be directly above category
//VisibleFixes += RegexReplace(ref ArticleText, @"(?si){{DEFAULTSORT\:([^{}]+)}}(.*?\[\[Category\:(?:\d{4} (?:births|deaths)|Living people))\|\1\]\]", "{{DEFAULTSORT:$1}}$2]]"); // births deaths category cleaner
//VisibleFixes += RegexReplace(ref ArticleText, @"(?si){{DEFAULTSORT\:([^{}]+)}}(.*?)\[\[Category:Living people\]\]\n(.*?)\[\[Category:Year of birth missing (living people)\]\](?:\n)?", "{{Lifetime|||$1}}$2$3"); // living birth missing lifetime
//VisibleFixes += RegexReplace(ref ArticleText, @"(\[\[(?:(?:January|February|March|April|May|June|July|August|September|October|November|December)\s*[1-3]?\d|[1-3]?\d\s+(?:January|February|March|April|May|June|July|August|September|October|November|December))\]\],?)\s+(\d{4})\b", "$1 [[$2]]"); // fully wikilink part wikilinked dates
//VisibleFixes += RegexReplace(ref ArticleText, @"\b((?:(?:January|February|March|April|May|June|July|August|September|October|November|December)\s*[1-3]?\d|[1-3]?\d\s+(?:January|February|March|April|May|June|July|August|September|October|November|December)))(,?)\s+(\[\[\d{4}\]\])", "[[$1]]$2 $3"); // fully wikilink part wikilinked dates
if(!ArticleTextBeforeDEFAULTSORT.Equals(ArticleText))
Summary += "tidy up DEFAULTSORT/categories, ";
// of date between month & year
ArticleTextBeforeOfDate = ArticleText;
VisibleFixesCheckpoint = VisibleFixes;
VisibleFixes += LoopedRegexReplace(ref ArticleText, @"(?m)(^[^""{]+?(?:January|February|March|April|May|June|July|August|September|October|November|December))\s+of\s+(200\d|1[89]\d\d)\b(?<!^[^""{]+?\b[Tt]he\s{1,5}\w{3,15}\s{1,5}of\s{1,5}(200\d|1[89]\d\d))", "$1 $2"); // don't match 'in the June of 2007...'
if (!ArticleTextBeforeOfDate.Equals(ArticleText))
{
SpecificFixes = VisibleFixes - VisibleFixesCheckpoint;
Summary += String.Format("rm 'of' btwn month & year, ({0})", SpecificFixes);
}
// langfixes
// old italics inversion: && !Regex.IsMatch(ArticleText, @"(Abkhazian|Afar|Afrikaans|Akan|Albanian|Amharic|Arabic|Aragonese|Armenian|Assamese|Avaric|Avestan|Aymara|Azerbaijani|Bambara|Bashkir|Basque|Belarusian|Bengali|Bihari|Bislama|Bosnian|Breton|Bulgarian|Burmese|Catalan|Chamorro|Chechen|Chichewa|Chinese|Church Slavic|Chuvash|Cornish|Corsican|Cree|Croatian|Czech|Danish|Divehi|Dutch|Dzongkha|English|Esperanto|Estonian|Ewe|Faroese|Fijian|Finnish|French|Fulah|Galician|Ganda|Georgian|German|Greek|Guaran|Gujarati|Haitian|Hausa|Hebrew|Herero|Hindi|Hiri Motu|Hungarian|Icelandic|Ido|Igbo|Indonesian|Interlingue|Inuktitut|Inupiaq|Irish|Italian|Japanese|Javanese|Kalaallisut|Kannada|Kanuri|Kashmiri|Kazakh|Khmer|Kikuyu|Kinyarwanda|Kirghiz|Kirundi|Komi|Kongo|Korean|Kurdish|Kwanyama|Lao|Latin|Latvian|Limburgish|Lingala|Lithuanian|Luxembourgish|Macedonian|Malagasy|Malay|Malayalam|Maltese|Manx|Marathi|Marshallese|Moldavian|Mongolian|Nauru|Navajo|Ndonga|Nepali|North Ndebele|Northern Sami|Norwegian|Norwegian Bokml|Norwegian Nynorsk|Occitan|Ojibwa|Oriya|Oromo|Ossetian|P[au]njabi|Pashto|Persian|Polish|Portuguese|Quechua|Raeto-Romance|Romanian|Russian|Samoan|Sango|Sanskrit|Sardinian|Scottish Gaelic|Serbian|Serbo-Croatian|Shona|Sichuan Yi|Sindhi|Sinhala|Slovak|Slovenian|Somali|South Ndebele|Southern Sotho|Spanish|Sundanese|Swahili|Swati|Swedish|Tagalog|Tahitian|Tajik|Tamil|Tatar|Telugu|Thai|Tibetan|Tigrinya|Tonga|Tsonga|Tswana|Turkish|Turkmen|Twi|Uighur|Ukrainian|Urdu|Uzbek|Venda|Vietnamese|Volapk|Walloon|Welsh|Western Frisian|Wolof|Xhosa|Yiddish|Yoruba|Zhuang|Zulu)\]\]\:\s*''[^'{}]+?''(?!\s*,?\s*(?:abbreviated|singular|plural|from|transl|alternative|\bor\b|(?:sometimes\s+)?also|meaning|literally|died|born|[A-Z]+\b|\(?\d{3,4}|['{}\;\)]))")
if(!REPORT_MODE)
{
int NonCountedFixes = 0; // language tag fixes involve multiple steps that should count as just one
ArticleTextBeforeLanguageTemplate = ArticleText;
NonCountedFixes += RegexReplace(ref ArticleText, @"(motto\s*=\s*)(''')?('')?(?:{{lang\|la\|)?(.*?)(?:}})?\3?\4?(\s*\<\/?br\s*\/?\>\s*(?:\<small\>)?\s*\(?\s*)\[\[\s*Latin\s*\]\]\:\s*(.*?)(\s*\|)", "$1{{lang-la|$2$4$2}}$5$6$7"); //Motto fix
NonCountedFixes += LoopedRegexReplace(ref ArticleText, @"(\(|;\s*)\[\[(Abkhazian|Afar|Afrikaans|Akan|Albanian|Amharic|Arabic|Aragonese|Armenian|Assamese|Avaric|Avestan|Aymara|Azerbaijani|Bambara|Bashkir|Basque|Belarusian|Bengali|Bihari|Bislama|Bosnian|Breton|Bulgarian|Burmese|Catalan|Chamorro|Chechen|Chichewa|Chinese|Church Slavic|Chuvash|Cornish|Corsican|Cree|Croatian|Czech|Danish|Divehi|Dutch|Dzongkha|English|Esperanto|Estonian|Ewe|Faroese|Fijian|Finnish|French|Fulah|Galician|Ganda|Georgian|German|Greek|Guaran|Gujarati|Haitian|Hausa|Hebrew|Herero|Hindi|Hiri Motu|Hungarian|Icelandic|Ido|Igbo|Indonesian|Interlingue|Inuktitut|Inupiaq|Irish|Italian|Japanese|Javanese|Kalaallisut|Kannada|Kanuri|Kashmiri|Kazakh|Khmer|Kikuyu|Kinyarwanda|Kirghiz|Kirundi|Komi|Kongo|Korean|Kurdish|Kwanyama|Lao|Latin|Latvian|Limburgish|Lingala|Lithuanian|Luxembourgish|Macedonian|Malagasy|Malay|Malayalam|Maltese|Manx|Marathi|Marshallese|Moldavian|Mongolian|Nauru|Navajo|Ndonga|Nepali|North Ndebele|Northern Sami|Norwegian|Norwegian Bokml|Norwegian Nynorsk|Occitan|Ojibwa|Oriya|Oromo|Ossetian|P[au]njabi|Pashto|Persian|Polish|Portuguese|Quechua|Raeto-Romance|Romanian|Russian|Samoan|Sango|Sanskrit|Sardinian|Scottish Gaelic|Serbian|Serbo-Croatian|Shona|Sichuan Yi|Sindhi|Sinhala|Slovak|Slovenian|Somali|South Ndebele|Southern Sotho|Spanish|Sundanese|Swahili|Swati|Swedish|Tagalog|Tahitian|Tajik|Tamil|Tatar|Telugu|Thai|Tibetan|Tigrinya|Tonga|Tsonga|Tswana|Turkish|Turkmen|Twi|Uighur|Ukrainian|Urdu|Uzbek|Venda|Vietnamese|Volapk|Walloon|Welsh|Western Frisian|Wolof|Xhosa|Yiddish|Yoruba|Zhuang|Zulu)\]\]\s*:+(\s*(?<ap>''|"")?(''')?([^'{}\)\]\[;/""]+?)\k<ap>?(\2)?\s*(\)|;|,\s*(?:abbreviated|singular|plural|from|transl|alternative|\bor\b|(?:sometimes\s+)?also|meaning|literally|died|born|\(?\d{3,4}))(\2)?(?:\k<ap>([^']))?)", "$1[[$2 language|$2]]:$3"); // converts wikilinked language name to [[blah language|blah]] to be templated by later fixes
NonCountedFixes += LoopedRegexReplace(ref ArticleText, @"(?i)(\(|;\s*)\s*\[\[\s*(Abkhazian|Afar|Afrikaans|Akan|Albanian|Amharic|Arabic|Aragonese|Armenian|Assamese|Avaric|Avestan|Aymara|Azerbaijani|Bambara|Bashkir|Basque|Belarusian|Bengali|Bihari|Bislama|Bosnian|Breton|Bulgarian|Burmese|Catalan|Chamorro|Chechen|Chichewa|Chinese|Church Slavic|Chuvash|Cornish|Corsican|Cree|Croatian|Czech|Danish|Divehi|Dutch|Dzongkha|English|Esperanto|Estonian|Ewe|Faroese|Fijian|Finnish|French|Fulah|Galician|Ganda|Georgian|German|Greek|Guaran|Gujarati|Haitian|Hausa|Hebrew|Herero|Hindi|Hiri Motu|Hungarian|Icelandic|Ido|Igbo|Indonesian|Interlingue|Inuktitut|Inupiaq|Irish|Italian|Japanese|Javanese|Kalaallisut|Kannada|Kanuri|Kashmiri|Kazakh|Khmer|Kikuyu|Kinyarwanda|Kirghiz|Kirundi|Komi|Kongo|Korean|Kurdish|Kwanyama|Lao|Latin|Latvian|Limburgish|Lingala|Lithuanian|Luxembourgish|Macedonian|Malagasy|Malay|Malayalam|Maltese|Manx|Marathi|Marshallese|Moldavian|Mongolian|Nauru|Navajo|Ndonga|Nepali|North Ndebele|Northern Sami|Norwegian|Norwegian Bokml|Norwegian Nynorsk|Occitan|Ojibwa|Oriya|Oromo|Ossetian|Panjabi|Pashto|Persian|Polish|Portuguese|Quechua|Raeto-Romance|Romanian|Russian|Samoan|Sango|Sanskrit|Sardinian|Scottish Gaelic|Serbian|Serbo-Croatian|Shona|Sichuan Yi|Sindhi|Sinhala|Slovak|Slovenian|Somali|South Ndebele|Southern Sotho|Spanish|Sundanese|Swahili|Swati|Swedish|Tagalog|Tahitian|Tajik|Tamil|Tatar|Telugu|Thai|Tibetan|Tigrinya|Tonga|Tsonga|Tswana|Turkish|Turkmen|Twi|Uighur|Ukrainian|Urdu|Uzbek|Venda|Vietnamese|Volapk|Walloon|Welsh|Western Frisian|Wolof|Xhosa|Yiddish|Yoruba|Zhuang|Zulu) language\s*\|\s*\2\s*\]\]\s*:+\s*((?:''')?)((?:''|"")?)([^{}\)\]\[;/""<>']*?(?:(?:\(|''')[^{}\)\]\[;/""<>']+(?:\)|''')[^{}\)\]\[;/""<>']*)*)(?:(\3)\4\s*((?:(?<=[^']'''?(?:'')?)\s*)|\)|;|(?:,|(?<=[^']'''?(?:'')?))\s*\[*(?:abbreviated|singular|plural|from|transl|alternative|\bor\b|(?:sometimes\s+)?also|meaning|literally|died|born|[A-Z]+\b|\(?\d{3,4}))|\s*(\)|;|,\s*\[*(?:abbreviated|singular|plural|from|transl|alternative|\bor\b|(?:sometimes\s+)?also|meaning|literally|died|born|January|February|March|April|May|June|July|August|September|October|November|December|[A-Z]+\b|\(?\d{3,4}))(\3)\4)", "$1{{lang-$2@@|$3$5$6$9}}$7$8"); //Language tag fix general
// before running this find & replaces, check that one or more of the above three fixes changed something
if (!ArticleTextBeforeLanguageTemplate.Equals(ArticleText))
{
VisibleFixesCheckpoint = VisibleFixes;
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Abkhazian@@\|", "{{lang-ab|"); //Language tag fix for Abkhazian (ab)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Afar@@\|", "{{lang-aa|"); //Language tag fix for Afar (aa)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Afrikaans@@\|", "{{lang-af|"); //Language tag fix for Afrikaans (af)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Akan@@\|", "{{lang-ak|"); //Language tag fix for Akan (ak)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Albanian@@\|", "{{lang-sq|"); //Language tag fix for Albanian (sq)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Amharic@@\|", "{{lang-am|"); //Language tag fix for Amharic (am)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Arabic@@\|", "{{lang-ar|"); //Language tag fix for Arabic (ar)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Aragonese@@\|", "{{lang-an|"); //Language tag fix for Aragonese (an)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Armenian@@\|", "{{lang-hy|"); //Language tag fix for Armenian (hy)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Assamese@@\|", "{{lang-as|"); //Language tag fix for Assamese (as)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Avaric@@\|", "{{lang-av|"); //Language tag fix for Avaric (av)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Avestan@@\|", "{{lang-ae|"); //Language tag fix for Avestan (ae)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Aymara@@\|", "{{lang-ay|"); //Language tag fix for Aymara (ay)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Azerbaijani@@\|", "{{lang-az|"); //Language tag fix for Azerbaijani (az)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Bambara@@\|", "{{lang-bm|"); //Language tag fix for Bambara (bm)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Bashkir@@\|", "{{lang-ba|"); //Language tag fix for Bashkir (ba)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Basque@@\|", "{{lang-eu|"); //Language tag fix for Basque (eu)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Belarusian@@\|", "{{lang-be|"); //Language tag fix for Belarusian (be)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Bengali@@\|", "{{lang-bn|"); //Language tag fix for Bengali (bn)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Bihari@@\|", "{{lang-bh|"); //Language tag fix for Bihari (bh)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Bislama@@\|", "{{lang-bi|"); //Language tag fix for Bislama (bi)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Bosnian@@\|", "{{lang-bs|"); //Language tag fix for Bosnian (bs)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Breton@@\|", "{{lang-br|"); //Language tag fix for Breton (br)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Bulgarian@@\|", "{{lang-bg|"); //Language tag fix for Bulgarian (bg)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Burmese@@\|", "{{lang-my|"); //Language tag fix for Burmese (my)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Catalan@@\|", "{{lang-ca|"); //Language tag fix for Catalan (ca)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Chamorro@@\|", "{{lang-ch|"); //Language tag fix for Chamorro (ch)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Chechen@@\|", "{{lang-ce|"); //Language tag fix for Chechen (ce)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Chichewa@@\|", "{{lang-ny|"); //Language tag fix for Chichewa (ny)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Chinese@@\|", "{{lang-zh|"); //Language tag fix for Chinese (zh)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Church Slavic@@\|", "{{lang-cu|"); //Language tag fix for Church Slavic (cu)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Chuvash@@\|", "{{lang-cv|"); //Language tag fix for Chuvash (cv)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Cornish@@\|", "{{lang-kw|"); //Language tag fix for Cornish (kw)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Corsican@@\|", "{{lang-co|"); //Language tag fix for Corsican (co)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Cree@@\|", "{{lang-cr|"); //Language tag fix for Cree (cr)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Croatian@@\|", "{{lang-hr|"); //Language tag fix for Croatian (hr)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Czech@@\|", "{{lang-cs|"); //Language tag fix for Czech (cs)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Danish@@\|", "{{lang-da|"); //Language tag fix for Danish (da)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Divehi@@\|", "{{lang-dv|"); //Language tag fix for Divehi (dv)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Dutch@@\|", "{{lang-nl|"); //Language tag fix for Dutch (nl)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Dzongkha@@\|", "{{lang-dz|"); //Language tag fix for Dzongkha (dz)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-English@@\|", "{{lang-en|"); //Language tag fix for English (en)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Esperanto@@\|", "{{lang-eo|"); //Language tag fix for Esperanto (eo)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Estonian@@\|", "{{lang-et|"); //Language tag fix for Estonian (et)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Ewe@@\|", "{{lang-ee|"); //Language tag fix for Ewe (ee)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Faroese@@\|", "{{lang-fo|"); //Language tag fix for Faroese (fo)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Fijian@@\|", "{{lang-fj|"); //Language tag fix for Fijian (fj)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Finnish@@\|", "{{lang-fi|"); //Language tag fix for Finnish (fi)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-French@@\|", "{{lang-fr|"); //Language tag fix for French (fr)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Fulah@@\|", "{{lang-ff|"); //Language tag fix for Fulah (ff)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Galician@@\|", "{{lang-gl|"); //Language tag fix for Galician (gl)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Ganda@@\|", "{{lang-lg|"); //Language tag fix for Ganda (lg)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Georgian@@\|", "{{lang-ka|"); //Language tag fix for Georgian (ka)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-German@@\|", "{{lang-de|"); //Language tag fix for German (de)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Greek@@\|", "{{lang-el|"); //Language tag fix for Greek (el)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Guaran@@\|", "{{lang-gn|"); //Language tag fix for Guaran (gn)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Gujarati@@\|", "{{lang-gu|"); //Language tag fix for Gujarati (gu)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Haitian@@\|", "{{lang-ht|"); //Language tag fix for Haitian (ht)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Hausa@@\|", "{{lang-ha|"); //Language tag fix for Hausa (ha)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Hebrew@@\|", "{{lang-he|"); //Language tag fix for Hebrew (he)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Herero@@\|", "{{lang-hz|"); //Language tag fix for Herero (hz)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Hindi@@\|", "{{lang-hi|"); //Language tag fix for Hindi (hi)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Hiri Motu@@\|", "{{lang-ho|"); //Language tag fix for Hiri Motu (ho)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Hungarian@@\|", "{{lang-hu|"); //Language tag fix for Hungarian (hu)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Icelandic@@\|", "{{lang-is|"); //Language tag fix for Icelandic (is)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Ido@@\|", "{{lang-io|"); //Language tag fix for Ido (io)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Igbo@@\|", "{{lang-ig|"); //Language tag fix for Igbo (ig)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Indonesian@@\|", "{{lang-id|"); //Language tag fix for Indonesian (id)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Interlingue@@\|", "{{lang-ie|"); //Language tag fix for Interlingue (ie)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Inuktitut@@\|", "{{lang-iu|"); //Language tag fix for Inuktitut (iu)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Inupiaq@@\|", "{{lang-ik|"); //Language tag fix for Inupiaq (ik)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Irish@@\|", "{{lang-ga|"); //Language tag fix for Irish (ga)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Italian@@\|", "{{lang-it|"); //Language tag fix for Italian (it)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Japanese@@\|", "{{lang-ja|"); //Language tag fix for Japanese (ja)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Javanese@@\|", "{{lang-jv|"); //Language tag fix for Javanese (jv)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Kalaallisut@@\|", "{{lang-kl|"); //Language tag fix for Kalaallisut (kl)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Kannada@@\|", "{{lang-kn|"); //Language tag fix for Kannada (kn)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Kanuri@@\|", "{{lang-kr|"); //Language tag fix for Kanuri (kr)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Kashmiri@@\|", "{{lang-ks|"); //Language tag fix for Kashmiri (ks)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Kazakh@@\|", "{{lang-kk|"); //Language tag fix for Kazakh (kk)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Khmer@@\|", "{{lang-km|"); //Language tag fix for Khmer (km)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Kikuyu@@\|", "{{lang-ki|"); //Language tag fix for Kikuyu (ki)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Kinyarwanda@@\|", "{{lang-rw|"); //Language tag fix for Kinyarwanda (rw)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Kirghiz@@\|", "{{lang-ky|"); //Language tag fix for Kirghiz (ky)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Kirundi@@\|", "{{lang-rn|"); //Language tag fix for Kirundi (rn)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Komi@@\|", "{{lang-kv|"); //Language tag fix for Komi (kv)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Kongo@@\|", "{{lang-kg|"); //Language tag fix for Kongo (kg)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Korean@@\|", "{{lang-ko|"); //Language tag fix for Korean (ko)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Kurdish@@\|", "{{lang-ku|"); //Language tag fix for Kurdish (ku)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Kwanyama@@\|", "{{lang-kj|"); //Language tag fix for Kwanyama (kj)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Lao@@\|", "{{lang-lo|"); //Language tag fix for Lao (lo)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Latin@@\|", "{{lang-la|"); //Language tag fix for Latin (la)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Latvian@@\|", "{{lang-lv|"); //Language tag fix for Latvian (lv)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Limburgish@@\|", "{{lang-li|"); //Language tag fix for Limburgish (li)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Lingala@@\|", "{{lang-ln|"); //Language tag fix for Lingala (ln)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Lithuanian@@\|", "{{lang-lt|"); //Language tag fix for Lithuanian (lt)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Luxembourgish@@\|", "{{lang-lb|"); //Language tag fix for Luxembourgish (lb)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Macedonian@@\|", "{{lang-mk|"); //Language tag fix for Macedonian (mk)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Malagasy@@\|", "{{lang-mg|"); //Language tag fix for Malagasy (mg)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Malay@@\|", "{{lang-ms|"); //Language tag fix for Malay (ms)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Malayalam@@\|", "{{lang-ml|"); //Language tag fix for Malayalam (ml)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Maltese@@\|", "{{lang-mt|"); //Language tag fix for Maltese (mt)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Manx@@\|", "{{lang-gv|"); //Language tag fix for Manx (gv)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Marathi@@\|", "{{lang-mr|"); //Language tag fix for Marathi (mr)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Marshallese@@\|", "{{lang-mh|"); //Language tag fix for Marshallese (mh)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Moldavian@@\|", "{{lang-mo|"); //Language tag fix for Moldavian (mo)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Mongolian@@\|", "{{lang-mn|"); //Language tag fix for Mongolian (mn)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Nauru@@\|", "{{lang-na|"); //Language tag fix for Nauru (na)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Navajo@@\|", "{{lang-nv|"); //Language tag fix for Navajo (nv)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Ndonga@@\|", "{{lang-ng|"); //Language tag fix for Ndonga (ng)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Nepali@@\|", "{{lang-ne|"); //Language tag fix for Nepali (ne)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-North Ndebele@@\|", "{{lang-nd|"); //Language tag fix for North Ndebele (nd)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Northern Sami@@\|", "{{lang-se|"); //Language tag fix for Northern Sami (se)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Norwegian@@\|", "{{lang-no|"); //Language tag fix for Norwegian (no)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Norwegian Bokml@@\|", "{{lang-nb|"); //Language tag fix for Norwegian Bokml (nb)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Norwegian Nynorsk@@\|", "{{lang-nn|"); //Language tag fix for Norwegian Nynorsk (nn)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Occitan@@\|", "{{lang-oc|"); //Language tag fix for Occitan (oc)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Ojibwa@@\|", "{{lang-oj|"); //Language tag fix for Ojibwa (oj)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Oriya@@\|", "{{lang-or|"); //Language tag fix for Oriya (or)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Oromo@@\|", "{{lang-om|"); //Language tag fix for Oromo (om)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Ossetian@@\|", "{{lang-os|"); //Language tag fix for Ossetian (os)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Panjabi@@\|", "{{lang-pa|"); //Language tag fix for Panjabi (pa)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Pashto@@\|", "{{lang-ps|"); //Language tag fix for Pashto (ps)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Persian@@\|", "{{lang-fa|"); //Language tag fix for Persian (fa)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Polish@@\|", "{{lang-pl|"); //Language tag fix for Polish (pl)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Portuguese@@\|", "{{lang-pt|"); //Language tag fix for Portuguese (pt)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Quechua@@\|", "{{lang-qu|"); //Language tag fix for Quechua (qu)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Raeto-Romance@@\|", "{{lang-rm|"); //Language tag fix for Raeto-Romance (rm)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Romanian@@\|", "{{lang-ro|"); //Language tag fix for Romanian (ro)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Russian@@\|", "{{lang-ru|"); //Language tag fix for Russian (ru)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Samoan@@\|", "{{lang-sm|"); //Language tag fix for Samoan (sm)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Sango@@\|", "{{lang-sg|"); //Language tag fix for Sango (sg)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Sanskrit@@\|", "{{lang-sa|"); //Language tag fix for Sanskrit (sa)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Sardinian@@\|", "{{lang-sc|"); //Language tag fix for Sardinian (sc)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Scottish Gaelic@@\|", "{{lang-gd|"); //Language tag fix for Scottish Gaelic (gd)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Serbian@@\|", "{{lang-sr|"); //Language tag fix for Serbian (sr)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Serbo-Croatian@@\|", "{{lang-sh|"); //Language tag fix for Serbo-Croatian (sh)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Shona@@\|", "{{lang-sn|"); //Language tag fix for Shona (sn)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Sichuan Yi@@\|", "{{lang-ii|"); //Language tag fix for Sichuan Yi (ii)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Sindhi@@\|", "{{lang-sd|"); //Language tag fix for Sindhi (sd)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Sinhala@@\|", "{{lang-si|"); //Language tag fix for Sinhala (si)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Slovak@@\|", "{{lang-sk|"); //Language tag fix for Slovak (sk)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Slovenian@@\|", "{{lang-sl|"); //Language tag fix for Slovenian (sl)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Somali@@\|", "{{lang-so|"); //Language tag fix for Somali (so)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-South Ndebele@@\|", "{{lang-nr|"); //Language tag fix for South Ndebele (nr)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Southern Sotho@@\|", "{{lang-st|"); //Language tag fix for Southern Sotho (st)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Spanish@@\|", "{{lang-es|"); //Language tag fix for Spanish (es)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Sundanese@@\|", "{{lang-su|"); //Language tag fix for Sundanese (su)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Swahili@@\|", "{{lang-sw|"); //Language tag fix for Swahili (sw)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Swati@@\|", "{{lang-ss|"); //Language tag fix for Swati (ss)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Swedish@@\|", "{{lang-sv|"); //Language tag fix for Swedish (sv)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Tagalog@@\|", "{{lang-tl|"); //Language tag fix for Tagalog (tl)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Tahitian@@\|", "{{lang-ty|"); //Language tag fix for Tahitian (ty)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Tajik@@\|", "{{lang-tg|"); //Language tag fix for Tajik (tg)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Tamil@@\|", "{{lang-ta|"); //Language tag fix for Tamil (ta)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Tatar@@\|", "{{lang-tt|"); //Language tag fix for Tatar (tt)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Telugu@@\|", "{{lang-te|"); //Language tag fix for Telugu (te)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Thai@@\|", "{{lang-th|"); //Language tag fix for Thai (th)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Tibetan@@\|", "{{lang-bo|"); //Language tag fix for Tibetan (bo)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Tigrinya@@\|", "{{lang-ti|"); //Language tag fix for Tigrinya (ti)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Tonga@@\|", "{{lang-to|"); //Language tag fix for Tonga (to)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Tsonga@@\|", "{{lang-ts|"); //Language tag fix for Tsonga (ts)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Tswana@@\|", "{{lang-tn|"); //Language tag fix for Tswana (tn)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Turkish@@\|", "{{lang-tr|"); //Language tag fix for Turkish (tr)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Turkmen@@\|", "{{lang-tk|"); //Language tag fix for Turkmen (tk)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Twi@@\|", "{{lang-tw|"); //Language tag fix for Twi (tw)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Uighur@@\|", "{{lang-ug|"); //Language tag fix for Uighur (ug)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Ukrainian@@\|", "{{lang-uk|"); //Language tag fix for Ukrainian (uk)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Urdu@@\|", "{{lang-ur|"); //Language tag fix for Urdu (ur)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Uzbek@@\|", "{{lang-uz|"); //Language tag fix for Uzbek (uz)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Venda@@\|", "{{lang-ve|"); //Language tag fix for Venda (ve)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Vietnamese@@\|", "{{lang-vi|"); //Language tag fix for Vietnamese (vi)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Volapk@@\|", "{{lang-vo|"); //Language tag fix for Volapk (vo)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Walloon@@\|", "{{lang-wa|"); //Language tag fix for Walloon (wa)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Welsh@@\|", "{{lang-cy|"); //Language tag fix for Welsh (cy)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Western Frisian@@\|", "{{lang-fy|"); //Language tag fix for Western Frisian (fy)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Wolof@@\|", "{{lang-wo|"); //Language tag fix for Wolof (wo)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Xhosa@@\|", "{{lang-xh|"); //Language tag fix for Xhosa (xh)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Yiddish@@\|", "{{lang-yi|"); //Language tag fix for Yiddish (yi)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Yoruba@@\|", "{{lang-yo|"); //Language tag fix for Yoruba (yo)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Zhuang@@\|", "{{lang-za|"); //Language tag fix for Zhuang (za)
VisibleFixes += RegexReplace(ref ArticleText, @"(?i){{lang-Zulu@@\|", "{{lang-zu|"); //Language tag fix for Zulu (zu)
VisibleFixes += RegexReplace(ref ArticleText, @"([^'])('')((?:\(|;\s*){{lang-\w\w\|.*?}}\))\2([^'])", "$1$3$4"); //removes outside italics
VisibleFixes += RegexReplace(ref ArticleText, @"([^'])(''')((?:\(|;\s*){{lang-\w\w\|)([^']+?)(}}\))\2([^'])", "$1$3$2$4$2$5$6"); //moves bold inside
NonCountedFixes += RegexReplace(ref ArticleText, @" }};", " }}", false); // Fix where control character matches on ; within Anbsp; HTML tag
NonCountedFixes += RegexReplace(ref ArticleText, @"(\(\{\{lang-([a-z][a-z])\|[^{}\|<>""]+}},?\s+(?:or|also|also\s+called|singular:)\s+)(''+[^{}\|<>'""]+''+)\s*\)", "$1{{lang|$2|$3}})"); // ({{lang-aa|word}} or ''word'') --> ({{lang-aa|word}} or {{lang|ar|''word''}})
SpecificFixes = VisibleFixes - VisibleFixesCheckpoint;
Summary += String.Format("apply [[Template:Lang]] ({0}), ", SpecificFixes);
}
}
else if(REPORT_MODE) // just extract text in ([[Foreign language|Foreign]]: ''text'') to edit summary to review whether it's English or not
{
string LangTagReport = "";
LangTagReport = LanguageTagReport(ArticleText);
if(!LangTagReport.Equals(""))
{
LogToFile("[[" + ArticleTitle + "]]" + "@@@" + LangTagReport);
ArticleLogged = true;
}
}
// do some stuff in report mode only, TODO under construction
if(REPORT_MODE || APPLY_AMBIGUOUS_FIXES)
{
ArticleTextBeforeReportMode = ArticleText;
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)(\{\{\s*cit[^{}]*\w+\s\w+\s\w+\s\w+\s*)=\s*\|", "$1|"); // remove = from end of field
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)(\{\{\s*cit[^{}]*\|)(^[={}]*?\|)", "$1unused_text=$2"); // cite fields without a field= entry between two bars
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)(<ref)>(\s+name\s*=\s*[^<>{}\/]+?/?>)", "$1$2"); // <ref> name = "Fred">, TODO scan & test
if(!Regex.IsMatch(ArticleText, @"(?si)\{\{\s*cite\s+(map|sm|manual)\b\s*\|"))
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)(\{\{\s*cit[^{}]*\|\s*\w{2,}\s*=\s*[^{}\|<>]+?)[,\\/\+]?(\s(?-i)[a-z\d]{2,}\s*=)", "$1|$2"); // cite fields with no | between fields, |, matches lowercase second field name only, TODO scan & test,
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)(\{\{\s*cit[^{}]*\|\s*url\s*)\|(\s*http://)", "$1=$2"); // {{cite web url|http://, TODO scan & test
// trial fixes section
// cite...title=[http://url description] --> title=description | url=http://url, TODO scan & test
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)(\{\{\s*cit[^{}]*\|\s*title\s*=\s*)\[+\s*(http://[^\[\]<>""\s]+)\s+([^{}\|<>]+?)\]+(\s*(?:\||\}\}))" , "$1$3|url=$2$4");
// cite title=http://url --> title=null | url=http://url
VisibleFixes += RegexReplace(ref ArticleText, @"(?si)(\{\{\s*cit[^{}]*\|\s*title\s*=\s*)\[*\s*(http://[^\[\]<>""\s]+)\s*\]*(\s*(?:\||\}\}))", "$1|url=$2$3");
// end trial fixes
if (!ArticleTextBeforeReportMode.Equals(ArticleText))
{
LogToFile("[[" + ArticleTitle + "]]" + "@@@" + "Ambiguous fixes applied");
ArticleLogged = true;
}
}
// Rename ==Links== to ==External links==
VisibleFixes += RegexReplace(ref ArticleText, @"(?sim)(==+\s*)Links(\s*==+\s*(?:^(?:\*|\d\.?)?\s*\[?\s*http://))", "$1External links$2", false);
// articles with references but no {{reflist...}} to display them
if(!Regex.IsMatch(ArticleText, @"(?si)(\{\{\s*ref(?:-?list|erence)[^{}]*\}\}|<references\s*/>|\{\{refs)") && Regex.IsMatch(ArticleText, @"</ref>"))
{
VisibleFixes++;
Summary += @"add {{reflist}} ";
if(Regex.IsMatch(ArticleText, @"(?i)==\s*'*\s*References?\s*'*\s*=="))
ArticleText = Regex.Replace(ArticleText, @"(?i)(==+\s*'*\s*References?\s*'*\s*==+)", "$1\r\n{{Reflist}}<!--added under references heading by script-assisted edit-->");
else
{
//now try to move just above external links
if(Regex.IsMatch(ArticleText, @"(?im)(^\s*=+\s*(?:External\s+link|Source|Web\s*link)s?\s*=)"))
{
ArticleText += "\r\n==References==\r\n{{Reflist}}<!--added above External links/Sources by script-assisted edit-->";
ArticleText = Regex.Replace(ArticleText, @"(?sim)(^\s*=+\s*(?:External\s+link|Source|Web\s*link)s?\s*=+.*?)(\r\n==+References==+\r\n{{Reflist}}<!--added above External links/Sources by script-assisted edit-->)", "$2\r\n$1");
Summary += "above External links/Sources, ";
}
else
{ // now try to move just above categories
if(Regex.IsMatch(ArticleText, @"(?im)(^\s*\[\[\s*Category\s*:)"))
{
ArticleText += "\r\n==References==\r\n{{Reflist}}<!--added above categories/infobox footers by script-assisted edit-->";
ArticleText = Regex.Replace(ArticleText, @"(?sim)((?:^\{\{[^{}]+?\}\}\s*)*)(^\s*\[\[\s*Category\s*:.*?)(\r\n==+References==+\r\n{{Reflist}}<!--added above categories/infobox footers by script-assisted edit-->)", "$3\r\n$1$2");
Summary += "above categories/infobox footers, ";
}
else // now move above templated boxes at bottom and set to report mode
{
ArticleText += "\r\n==References==\r\n{{Reflist}}<!--added to end of article by script-assisted edit-->";
ArticleText = Regex.Replace(ArticleText, @"(?sim)(^==.*?)(^\{\{[^{}]+?\}\}.*?)(\r\n==+References==+\r\n{{Reflist}}<!--added to end of article by script-assisted edit-->)", "$1\r\n$3\r\n$2");
Summary += "to end of article, ";
if(!APPLY_AMBIGUOUS_FIXES)
REPORT_MODE = true; // positioning could be suboptimal
}
}
}
}
ArticleText = ht.AddBack(ArticleText);
// if in report mode log articles with a visible change, except those already logged by langtags report mode
if(REPORT_MODE && VisibleFixes > 0 && !ArticleLogged)
{
if(Summary.Equals(""))
Summary = "no specific summary";
LogToFile("[[" + ArticleTitle + "]]" + "@@@" + Summary);
}
if ((VisibleFixes == 0 && !FIXING_TYPOS) || REPORT_MODE)
{
Skip = true;
Summary = "";
return(OriginalArticleText);
}
Summary = String.Format("({0}) " + Summary, VisibleFixes);
return(ArticleText);
}
// method to perform looped find & replace on given input string
public int LoopedRegexReplace(ref string ArticleTextLocal, string Find, string Replace, bool VisibleFix)
{
string Before = ArticleTextLocal;
int MatchCount = 0;
for(int i = 0; i < 20; i++) // should NEVER get into infinite loop but limit iterations just in case
{
MatchCount += Regex.Matches(ArticleTextLocal, Find).Count;
ArticleTextLocal = Regex.Replace(ArticleTextLocal, Find, Replace);
if(Before.Equals(ArticleTextLocal))
break;
}
if(!Before.Equals(ArticleTextLocal) && VisibleFix)
return(MatchCount);
return(0);
}
public int LoopedRegexReplace(ref string ArticleTextLocal, string Find, string Replace)
{
return(LoopedRegexReplace(ref ArticleTextLocal, Find, Replace, true));
}
// method to work out the dominant date format in an article given article text, returns string with locale (also used in edit summary)
public string DetermineArticleDateLocale(string ArticleTextLocal)
{
int AmericanDateCount = Regex.Matches(ArticleTextLocal, @"(January|February|March|April|May|June|July|August|September|October|November|December)\s*[0-3]?\d").Count;
int InternationalDateCount = Regex.Matches(ArticleTextLocal, @"[0-3]?\d\s*(January|February|March|April|May|June|July|August|September|October|November|December)").Count;
// if counts are equal, return "US"
if(InternationalDateCount > AmericanDateCount)
return("Intl."); //+ InternationalDateCount + @"/" + AmericanDateCount);
else
return("US"); //+ AmericanDateCount + @"/" + InternationalDateCount);
}
// method to add format=PDF to citations without it, masks then unmasks citations with field already set
public int AddPDFFormatField(ref string ArticleTextLocal)
{
string Before = ArticleTextLocal;
int MatchCount = 0;
// mask citations with PDF format given
ArticleTextLocal = Regex.Replace(ArticleTextLocal, @"(?si)\{\{(\s*cit[^{}]*format\s*=[^}{]*PDF)", "#{{#$1");
// add PDF format
MatchCount = Regex.Matches(ArticleTextLocal, @"(?si)(\{\{\s*cit[^{}]*\|\s*url\s*=\s*[^\|}{]+?\.PDF\s*)(\||\}\})").Count;
ArticleTextLocal = Regex.Replace(ArticleTextLocal, @"(?si)(\{\{\s*cit[^{}]*\|\s*url\s*=\s*[^\|}{]+?\.PDF\s*)(\||\}\})", "$1|format=PDF$2");
// unmask
ArticleTextLocal = Regex.Replace(ArticleTextLocal, @"#\{\{#", "{{");
if(!Before.Equals(ArticleTextLocal))
return(MatchCount);
return(0);
}
/* class to carry out bracket fixes on external links
* the fixes may cause the regex engine to hang if a close (but not matching) string is in the article
* hence a separate class is required so that the fixes can be run on a separate thread
* and the thread cancelled after a couple of seconds
*/
public class BracketOnThread
{
private string ArticleTextLocal;
private int VisibleFixes = 0;
public BracketOnThread(string ArticleTextLocalIn)
{
this.ArticleTextLocal = ArticleTextLocalIn;
}
public string getArticleText()
{
return this.ArticleTextLocal;
}
public int getVisibleFixes()
{
return this.VisibleFixes;
}
public void Do()
{
string Before = ArticleTextLocal;
// remove extra square brackets around external links 1/2
ArticleTextLocal = Regex.Replace(ArticleTextLocal, @"(?i)\[\[\s*(http://[^\]\[]{0,500}?(?:[^\]\[]{0,500}?(?:\[\[[^\]\[]*?\]\][^\]\[]*?)[^\]\[]*?){0,9})\]\]?([^\]\[])", "[$1]$2");
// ... where external link is within image description (do not 'hide images' for these to work) 1/2
ArticleTextLocal = Regex.Replace(ArticleTextLocal, @"(?i)(\[\[.*?)\[\s*(http://[^\]\[]{0,500}?(?:[^\]\[]{0,500}?(?:\[\[[^\]\[]{0,500}?\]\][^\]\[]{0,500}?)[^\]\[]{0,500}?)?)\]\]([^\]\[]{0,500}?\]\])", "$1[$2]$3");
// these two (particularly) can cause the regex engine to hang on a non-matching close match:
// remove extra square brackets around external links 2/2
ArticleTextLocal = Regex.Replace(ArticleTextLocal, @"(?i)\[\[?\s*(http://[^\]\[]{0,500}?(?:[^\]\[]{0,500}?(?:\[\[[^\]\[]{0,500}?\]\][^\]\[]{0,500}?)[^\]\[]{0,500}?){0,9})\]\]([^\]\[])", "[$1]$2");
// ... where external link is within image description (do not 'hide images' for these to work) 2/2
ArticleTextLocal = Regex.Replace(ArticleTextLocal, @"(?i)(\[\[.*?)\[\[\s*(http://[^\]\[]{0,500}?(?:[^\]\[]{0,500}?(?:\[\[[^\]\[]*?\]\][^\]\[]*?)[^\]\[]*?){0,9})\]\]?([^\]\[]*?\]\])", "$1[$2]$3");
if(!Before.Equals(ArticleTextLocal))
VisibleFixes=1;
}
}
// method to extract text in ([[Foreign language|Foreign]]: ''text'') to edit summary to review whether it's English or not
public string LanguageTagReport(string ArticleTextLocal)
{
string ReturnString = "@@@langtag@@@";
Regex exp = new Regex(@"(\(|;\s*)\s*\[\[\s*(Abkhazian|Afar|Afrikaans|Akan|Albanian|Amharic|Arabic|Aragonese|Armenian|Assamese|Avaric|Avestan|Aymara|Azerbaijani|Bambara|Bashkir|Basque|Belarusian|Bengali|Bihari|Bislama|Bosnian|Breton|Bulgarian|Burmese|Catalan|Chamorro|Chechen|Chichewa|Chinese|Church Slavic|Chuvash|Cornish|Corsican|Cree|Croatian|Czech|Danish|Divehi|Dutch|Dzongkha|English|Esperanto|Estonian|Ewe|Faroese|Fijian|Finnish|French|Fulah|Galician|Ganda|Georgian|German|Greek|Guaran|Gujarati|Haitian|Hausa|Hebrew|Herero|Hindi|Hiri Motu|Hungarian|Icelandic|Ido|Igbo|Indonesian|Interlingue|Inuktitut|Inupiaq|Irish|Italian|Japanese|Javanese|Kalaallisut|Kannada|Kanuri|Kashmiri|Kazakh|Khmer|Kikuyu|Kinyarwanda|Kirghiz|Kirundi|Komi|Kongo|Korean|Kurdish|Kwanyama|Lao|Latin|Latvian|Limburgish|Lingala|Lithuanian|Luxembourgish|Macedonian|Malagasy|Malay|Malayalam|Maltese|Manx|Marathi|Marshallese|Moldavian|Mongolian|Nauru|Navajo|Ndonga|Nepali|North Ndebele|Northern Sami|Norwegian|Norwegian Bokml|Norwegian Nynorsk|Occitan|Ojibwa|Oriya|Oromo|Ossetian|Panjabi|Pashto|Persian|Polish|Portuguese|Quechua|Raeto-Romance|Romanian|Russian|Samoan|Sango|Sanskrit|Sardinian|Scottish Gaelic|Serbian|Serbo-Croatian|Shona|Sichuan Yi|Sindhi|Sinhala|Slovak|Slovenian|Somali|South Ndebele|Southern Sotho|Spanish|Sundanese|Swahili|Swati|Swedish|Tagalog|Tahitian|Tajik|Tamil|Tatar|Telugu|Thai|Tibetan|Tigrinya|Tonga|Tsonga|Tswana|Turkish|Turkmen|Twi|Uighur|Ukrainian|Urdu|Uzbek|Venda|Vietnamese|Volapk|Walloon|Welsh|Western Frisian|Wolof|Xhosa|Yiddish|Yoruba|Zhuang|Zulu) language\s*\|\s*\2\s*\]\]\s*:+\s*((?:''')?)((?:''|"")?)([^{}\)\]\[;/""]*?(?:\([^{}\)\]\[;/""]+?\)[^{}\)\]\[;/""]*?)*)(?:(\3)\4\s*(\)|;|,\s*(?:abbreviated|singular|plural|from|transl|alternative|\bor\b|(?:sometimes\s+)?also|meaning|literally|died|born|[A-Z]+\b|\(?\d{3,4}))|\s*(\)|;|,\s*(?:abbreviated|singular|plural|from|transl|alternative|\bor\b|(?:sometimes\s+)?also|meaning|literally|died|born|January|February|March|April|May|June|July|August|September|October|November|December|[A-Z]+\b|\(?\d{3,4}))(\3)\4)", RegexOptions.IgnoreCase);
ArticleTextLocal = Regex.Replace(ArticleTextLocal, @"(\(|;\s*)\[\[(Abkhazian|Afar|Afrikaans|Akan|Albanian|Amharic|Arabic|Aragonese|Armenian|Assamese|Avaric|Avestan|Aymara|Azerbaijani|Bambara|Bashkir|Basque|Belarusian|Bengali|Bihari|Bislama|Bosnian|Breton|Bulgarian|Burmese|Catalan|Chamorro|Chechen|Chichewa|Chinese|Church Slavic|Chuvash|Cornish|Corsican|Cree|Croatian|Czech|Danish|Divehi|Dutch|Dzongkha|English|Esperanto|Estonian|Ewe|Faroese|Fijian|Finnish|French|Fulah|Galician|Ganda|Georgian|German|Greek|Guaran|Gujarati|Haitian|Hausa|Hebrew|Herero|Hindi|Hiri Motu|Hungarian|Icelandic|Ido|Igbo|Indonesian|Interlingue|Inuktitut|Inupiaq|Irish|Italian|Japanese|Javanese|Kalaallisut|Kannada|Kanuri|Kashmiri|Kazakh|Khmer|Kikuyu|Kinyarwanda|Kirghiz|Kirundi|Komi|Kongo|Korean|Kurdish|Kwanyama|Lao|Latin|Latvian|Limburgish|Lingala|Lithuanian|Luxembourgish|Macedonian|Malagasy|Malay|Malayalam|Maltese|Manx|Marathi|Marshallese|Moldavian|Mongolian|Nauru|Navajo|Ndonga|Nepali|North Ndebele|Northern Sami|Norwegian|Norwegian Bokml|Norwegian Nynorsk|Occitan|Ojibwa|Oriya|Oromo|Ossetian|P[au]njabi|Pashto|Persian|Polish|Portuguese|Quechua|Raeto-Romance|Romanian|Russian|Samoan|Sango|Sanskrit|Sardinian|Scottish Gaelic|Serbian|Serbo-Croatian|Shona|Sichuan Yi|Sindhi|Sinhala|Slovak|Slovenian|Somali|South Ndebele|Southern Sotho|Spanish|Sundanese|Swahili|Swati|Swedish|Tagalog|Tahitian|Tajik|Tamil|Tatar|Telugu|Thai|Tibetan|Tigrinya|Tonga|Tsonga|Tswana|Turkish|Turkmen|Twi|Uighur|Ukrainian|Urdu|Uzbek|Venda|Vietnamese|Volapk|Walloon|Welsh|Western Frisian|Wolof|Xhosa|Yiddish|Yoruba|Zhuang|Zulu)\]\]\s*:+(\s*(?<ap>''|"")?(''')?([^'{}\)\]\[;/""]+?)\k<ap>?(\2)?\s*(\)|;|,\s*(?:abbreviated|singular|plural|from|transl|alternative|\bor\b|(?:sometimes\s+)?also|meaning|literally|died|born|\(?\d{3,4}))(\2)?(?:\k<ap>([^']))?)", "$1[[$2 language|$2]]:$3"); // converts wikilinked language name to [[blah language|blah]] to be templated by later fixes
MatchCollection MatchList = exp.Matches(ArticleTextLocal);
if(MatchList.Count == 0)
return("");
for (int i = 0; i < MatchList.Count; i++)
{
Match TheMatch = MatchList[i];
Group Group2 = TheMatch.Groups[2];
Group Group5 = TheMatch.Groups[5];
ReturnString += Group2.Value + "@@@" + Group5.Value + "@@@";
}
return(ReturnString);
}
// method to perform given regex replace on article text, returning whether visible fix made, fixes assumed visible unless specified
public int RegexReplace(ref string ArticleText, string Find, string Replace, bool VisibleFix)
{
string Before = ArticleText;
int MatchCount = Regex.Matches(ArticleText, Find).Count;
ArticleText = Regex.Replace(ArticleText, Find, Replace);
if(!Before.Equals(ArticleText) && VisibleFix)
return(MatchCount);
return(0);
}
public int RegexReplace(ref string ArticleText, string Find, string Replace)
{
return(RegexReplace(ref ArticleText, Find, Replace, true));
}
public void LogToFile(string text)
{
System.IO.StreamWriter writer = new System.IO.StreamWriter("Module.log", true); // specifies append mode
writer.WriteLine(text + "@@@" + DateTime.Now);
writer.Close();
}
//