User:Ohconfucius/test/Sources.js: Difference between revisions
Appearance
Content deleted Content added
Ohconfucius (talk | contribs) further tweak to removing http links within publisher/journal/work fields |
Ohconfucius (talk | contribs) tweaks to increase lookaheads use |
||
Line 110: | Line 110: | ||
// rem misplaced punctuation |
// rem misplaced punctuation |
||
regex(/(<ref[^>]*>[^<]+?[\]\.\},;–]\s*\'\'[\w-]*(?: [\w-]*){0,3})(\.com|)([;,\.])(\'\')([^<]*?<\/ref>)/gi, '$1$2$4$3 |
regex(/(<ref[^>]*>[^<]+?[\]\.\},;–]\s*\'\'[\w-]*(?: [\w-]*){0,3})(\.com|)([;,\.])(\'\')(?=[^<]*?<\/ref>)/gi, '$1$2$4$3'); |
||
regex(/([\w]+)\.(['"]\])[ ]/gi, '$1$2. '); //LQ for titles |
regex(/([\w]+)\.(['"]\])[ ]/gi, '$1$2. '); //LQ for titles |
||
Line 117: | Line 117: | ||
//rem underlining within certain fields |
//rem underlining within certain fields |
||
txt.value=txt.value.replace(/(\|\s?(?:journal|newspaper|periodical|publisher|work)\s*\=\s*)<u>([^|}]*)<\/u>/gi, '$1'); |
txt.value=txt.value.replace(/(\|\s?(?:journal|newspaper|periodical|publisher|work)\s*\=\s*)<u>([^|}]*)<\/u>/gi, '$1$2'); |
||
//rem redundant top-level domains (.com, .net, .org), strip "www" |
//rem redundant top-level domains (.com, .net, .org), strip "www" |
||
Line 161: | Line 161: | ||
// regex(/(\[\[)(?:foo|bar)(\|)/gi, '$1foo bar \(dab\)$2'); |
// regex(/(\[\[)(?:foo|bar)(\|)/gi, '$1foo bar \(dab\)$2'); |
||
regex(/(\[\[)(?:(?:British|English|London) Sun|Sun on Sunday|The Scottish Sun|(?:The |)Sun (?:\((?:British |)newspaper\)|\(tabloid\)|\(UK newspaper\)|\(UK\)|Newspaper|on Sunday|Online)|Thesun\.co\.uk)(\|)/gi, '$1The Sun (United Kingdom) |
regex(/(\[\[)(?:(?:British|English|London) Sun|Sun on Sunday|The Scottish Sun|(?:The |)Sun (?:\((?:British |)newspaper\)|\(tabloid\)|\(UK newspaper\)|\(UK\)|Newspaper|on Sunday|Online)|Thesun\.co\.uk)(?=\|)/gi, '$1The Sun (United Kingdom)'); |
||
regex(/(\[\[)Daily Star \((?:British|UK)\)(\|)/gi, '$1Daily Star (United Kingdom) |
regex(/(\[\[)Daily Star \((?:British|UK)\)(?=\|)/gi, '$1Daily Star (United Kingdom)'); |
||
regex(/(\[\[Metro)(?: \(Associated Metro Limited\)| \(Associated Newspapers\)| \(London newspaper\)| \(free London newspaper\)| UK| newspaper London| newspaper UK)(\|)/gi, '$1 (British newspaper) |
regex(/(\[\[Metro)(?: \(Associated Metro Limited\)| \(Associated Newspapers\)| \(London newspaper\)| \(free London newspaper\)| UK| newspaper London| newspaper UK)(?=\|)/gi, '$1 (British newspaper)'); |
||
regex(/(\[\[)(?:Calcutta Telegraph|The Telegraph \((?:kolkatt?a|India)\)|(?:The |)Telegraph India|Telegraphindia\.com)(\|)/gi, '$1The Telegraph (Calcutta) |
regex(/(\[\[)(?:Calcutta Telegraph|The Telegraph \((?:kolkatt?a|India)\)|(?:The |)Telegraph India|Telegraphindia\.com)(?=\|)/gi, '$1The Telegraph (Calcutta)'); |
||
regex(/(\[\[)Dawn(?:, Karachi| newspaper|\.com| \((?:Newspaper|Pakistan)\))(\|)/gi, '$1Dawn (newspaper) |
regex(/(\[\[)Dawn(?:, Karachi| newspaper|\.com| \((?:Newspaper|Pakistan)\))(?=\|)/gi, '$1Dawn (newspaper)'); |
||
regex(/(\[\[The Pioneer)(?:, Karachi| newspaper| \((?:indian newspaper)\))(\|)/gi, '$1 (Indian newspaper) |
regex(/(\[\[The Pioneer)(?:, Karachi| newspaper| \((?:indian newspaper)\))(?=\|)/gi, '$1 (Indian newspaper)'); |
||
regex(/(\[\[)dailypioneer.com(\|)/gi, '$1The Pioneer (Indian newspaper) |
regex(/(\[\[)dailypioneer.com(?=\|)/gi, '$1The Pioneer (Indian newspaper)'); |
||
regex(/(\|)(Sport \()(newspaper\))(\]\])/g, '$1$2Spanish $3'); //dab moved December 2012 |
regex(/(\|)(Sport \()(newspaper\))(?=\||\]\])/g, '$1$2Spanish $3'); //dab moved December 2012 |
||
regex(/(=[ ]*\[\[)(?:[BE]SPN ?(?:USA|HD|Network|the ocho|\(United States\))|E.S.P.N.|(?:The |)Entertainment (?:and |)Sports Programming Network)(?:\|[\w, ]*)(\]\])/gi, '$1ESPN |
regex(/(=[ ]*\[\[)(?:[BE]SPN ?(?:USA|HD|Network|the ocho|\(United States\))|E.S.P.N.|(?:The |)Entertainment (?:and |)Sports Programming Network)(?:\|[\w, ]*)(?=\]\])/gi, '$1ESPN'); |
||
regex(/(?:agency|journal|newspaper|periodical|publisher|work)(\s?=\s?\[\[)(?:MTV (?:[A-Z]\w*|\([^\)\]]*\)))\|[^\)\]]*(\]\])/gi, 'publisher$1MTV |
regex(/(?:agency|journal|newspaper|periodical|publisher|work)(\s?=\s?\[\[)(?:MTV (?:[A-Z]\w*|\([^\)\]]*\)))\|[^\)\]]*(?=\]\])/gi, 'publisher$1MTV'); |
||
//unwinding of unnecessary pipes |
//unwinding of unnecessary pipes |
||
regex(/ |
regex(/\[\[Public Broadcasting Service\|(PBS\]\])/gi, '[[$1'); |
||
} |
} |
||
Line 281: | Line 281: | ||
regex(/(\|)(The Sun)(?: \((?:Hong Kong|Malaysia|Nigeria|United Kingdom)\))(\]\])/g, '$1$2$3'); |
regex(/(\|)(The Sun)(?: \((?:Hong Kong|Malaysia|Nigeria|United Kingdom)\))(\]\])/g, '$1$2$3'); |
||
regex(/(=[ ]*The Telegraph) \(Calcutta\)(?=\s*[|}])/g, '$1|location=Kolkota'); |
regex(/(=[ ]*The Telegraph) \(Calcutta\)(?=\s*[|}])/g, '$1|location=Kolkota'); |
||
regex(/(''The Telegraph) \((Calcutta)\)('')/g, '$1$3 ( |
regex(/(''The Telegraph) \((Calcutta)\)('')/g, '$1$3 ($2)'); |
||
regex(/(\|)(The Telegraph)(?: \(Calcutta\))(\]\])/g, '$1$2$3'); |
regex(/(\|)(The Telegraph)(?: \(Calcutta\))(\]\])/g, '$1$2$3'); |
||
regex(/(=[ ]*The Daily Telegraph) \(Australia\)(?=\s*[|}])/g, '$1|location=Australia'); |
regex(/(=[ ]*The Daily Telegraph) \(Australia\)(?=\s*[|}])/g, '$1|location=Australia'); |
Revision as of 15:55, 12 April 2014
// *********************************************************************************************
//This is a test (non-production) script, and may have untested errors. Please exercise due care should you decide to use it.
// *********************************************************************************************
/*************
*** Regex menu framework
*** by [[m:user:Pathoschild]] <http://meta.wikimedia.org/wiki/User:Pathoschild/Scripts/Regex_menu_framework>
*** - adds a sidebar menu of user-defined scripts.
*************/
importScriptURI('//meta.wikimedia.org/w/index.php?title=User:Pathoschild/Scripts/Regex_menu_framework.js&action=raw&ctype=text/javascript');
importScript("User:Ohconfucius/script/MOSNUM_utils.js"); //needed for "'Accessed' -> 'Retrieved'"
importScript("User:Ohconfucius/test/Sources_subscript1.js"); //convert domain names into article names
importScript("User:Ohconfucius/test/Sources_subscript2.js"); //Correctly casing titles and apply or rem italicisation
importScript("User:Ohconfucius/test/Sources_subscript3.js"); //link-fixing, dabbing etc
importScript("User:Ohconfucius/script/foreigndates.js"); //link-fixing, dabbing etc
function Ohc_linkspam() {
var txt=document.editform.wpTextbox1;
txt.value=txt.value.replace(/([^>\*][ ]?)\[https?:\/\/[^\s\[\]]*[ ]([\'\w\d][^\[\]]*)\](?![ ]*[\n\-]+)/gi, '$1$2');
}
function Ohc_remove_urls() {
var txt=document.editform.wpTextbox1;
// removing http links within publisher/journal/work fields
regex(/((?:author|publisher|work) *= *)(?:https?:|ftp:)\/{2}(?:\w{2,4}\.|)(\w+)\.(?:com?|net|org|gov)(?:\.\w{2}|)(?:\/[^|}]*|)(?=\s*[\]|}])/gi, '$1$2');
txt.value=txt.value.replace(/(\|\s?(?:author(?:link\d?|)|journal|newspaper|publisher|website|work)\s*\=\s*)(?:https?:\/\/|)www\.(\w)/gi, '$1$2'); //leave only domain name
txt.value=txt.value.replace(/(\|\s?(?:newspaper|work|journal|publisher)\s*\=\s*)\[https?:\/\/[^\s\]]*\s([\w][^\]]*)\]/gi, '$1$2');
txt.value=txt.value.replace(/(\|\s?author(?:link\d?|)\s*\=\s*)\[https?:\/\/[^\s\]]*\s([\w][^\]]*)\]/gi, '$1$2');
txt.value=txt.value.replace(/(\|\s?author(?:link\d?|)\s*\=\s*)(?:https?:\/\/|)www\.[\w][^|}]*(?=[|}\n])/gi, '$1'); //rem outright (not a WL)
// removing references to other WP articles and 'external' WP links
regex(/<ref[^<>]*>[^<>]*\|[ ]*url ?=https?:\/\/(?:\w{2}\.wikipedia\.org\/wiki|(?:www\.|)(?:facebook|myspace|twitter)\.com)\/[^<>]*<\/ref>/gi, '{{cn}}');
regex(/<ref>https?:\/\/(?:\w{2}\.wikipedia\.org\/wiki|(?:www\.|)(?:facebook|myspace)\.com)\/[^\s\]<]*<\/ref>/gi, '{{cn}}');
regex(/<ref>\[https?:\/\/(?:\w{2}\.wikipedia\.org\/wiki|(?:www\.|)(?:facebook|myspace)\.com)\/[^\s\]]*[ ]+[\w\d][^\]]*\]<\/ref>/gi, '{{cn}}');
regex(/\|[ ]*url[ ]*=[ ]*https?:\/\/(?:\w{2}\.wikipedia\.org\/wiki|(?:www\.|)(?:facebook|myspace)\.com)[^\s\|\{\}<]*(?=[ ]*[|}])/gi, '');
regex(/[ ]\[https?:\/\/\w{2}\.wikipedia\.org\/wiki\/[^\s\]]*[ ]+([\w][^\]]*)\]/gi, ' [[$1]]');
regex(/(\|\s?url\s*\=\s*|\[)(https?:[^|{}#\s]+)#[A-Za-z0-9\.]{12,13}(?=[\s\[\]|{}<>])/gi, '$1$2'); //rem link tracking
}
function Ohc_sources_prep() {
var txt=document.editform.wpTextbox1;
// removing artefacts within fields
regex(/(\|\s?author\s*\=\s*)(?:by |)(?:wire staff|(?:staff |)reporters?|)[ ]*(?=[|}\n])/gi, '');
regex(/(\|\s?author\s*\=\s*)([A-Z][a-z]*(?: [A-Z][a-z]*)*) (?:wire staff|(?:staff |)reporters?)[ ]*(?=[|}\n])/gi, '$1$2');
regex(/\|[ ]*last=(Reporter|staff)[ ]*\|[ ]*first=[^|\{\}]*(?=[\|{}])=/gi, '');
regex(/(\|\s?accessdate\s*\=\s*)(?:accessed|retrieved)(?: by| on|):?[ ]*(\d)/gi, '$1$2');
regex(/(\|\s?volume\s*\=\s*)vol(?:ume|\.?)[ ]*(\d)/gi, '$1$2');
regex(/(\|\s?pages?\s*\=\s*)(?:pages?|p[gp]?\.?)[ ]*(\d)/gi, '$1$2');
//Remove COinS corrupting templates from CS1 citations
regex(/(\|\s?(?:authors?|first\d?|last\d?|publisher|work)\s*\=\s*(?:[^{}|]*|)){{(?:Sm|Aut|SC|Small[- ]caps|Sm?caps)\|([^{}|]*)}}(?=(?:[^{}|]*|)[|}])/gi, '$1');
regex(/(\|\s?)\w+\=(url\s*\=\s*https?:\/\/)(?=[|}\n])/gi, '$1$2'); //common cs1 error
regex(/(\|\s?url\s*\=)(www\.)(?=[|}\n])/gi, '$1http//$2'); //common cs1 error
regex(/(\|\s*date\s*=\s*)(?:not? |non-|un)date[ds]?\s*(?=[|}\n])/gi, '$1n.d.'); //common cs1 error
regex(/\{\{wikinews ?(|2|cat(?:egory)?|has|par2?|portal|table|-inline)(\|[^\}]+|)\}\}\s*/gi, '');
regex(/(\*[ ]*|)\[\[n:[^\]]*\]\][^\r\n]*[\r\n]/gi, '');
regex(/\*[ ]*\{\{(?:Facebook|Find a Grave|Myspace)\|([^}]*)\}\}[\n\r\s]*/gi, '');
// removing inappropriately populated fields
// regex(/(\|\s?at\s*\=\s*(?:pages? |)(?:[-–\d\s,;]*) ?)[^|}]+(?=[|}\n])/gi, '');
//citation template fixes
regex(/(\|\s?)published\s?=/gi, '$1publisher=');
// rem copyright assertion
regex(/(\|\s?publisher\s*\=\s*)(?:\[\[copyright(?:\|©|)\]\])\s?/gi, '$1');
regex(/(\|\s?publisher\s*\=\s*)(?:©|copyright)\s?/gi, '$1');
// misused 'date' parameter
regex(/\|\s?date(\s?=\s?[12]\d{3}\s?[|}])/gi, '|year$1');
// rem toggles and redundant quote marks
txt.value=txt.value.replace(/(\|\s?(?:agency|author|newspaper|work|journal|publisher)\s*\=\s*)\'\'([^|}]+)\'\'(?=\s*[\}\|])/gi, '$1$2'); //without link
txt.value=txt.value.replace(/(\|\s?(?:agency|author|newspaper|work|journal|publisher)\s*\=\s*)\'\'(\[\[(?:[^\|]+\||)[^\|\]]+\]\])\'\'(?=\s*[\}\|])/gi, '$1$2'); //with link
txt.value=txt.value.replace(/(\|\s?title\s*\=\s*)\''([^\|\{\}]+)\''/gi, '$1$2'); //rem ' in titles
txt.value=txt.value.replace(/(\|\s?publisher\s*\=\s*)\(([^\|\{\}]+)\)/gi, '$1$2'); //rem parenthetical publishers
// reordering 'work' and 'publisher' (first run - see second run in cleanup function)
regex(/(\|\s?publisher\s*\=\s*(?:\[\[[^<{}\]]*\]\]|[^{}\|\}<>]*))(\s?\|[^}<>]*|)(\|\s?(?: journal|newspaper|magazine|periodical|website|work)\s*\=\s*(?:\[\[[^<{}\]]*\]\]|[^{}\|\}<>]*))(?=[\s\.]*[|}])/g, '$3$1$2');
// remove redundant parentheses and templates from dm and md dates (equivalents also exists in Mosnum script)
regex(/(=[ ]*)\(([^()|{}])\)/gi, '$1$2');
regex(/(date[ ]*=[ ]*)\{\{(?:Start|End) ?date\|(\d{4})\|(0?\d|1[012])\|([0-2]?\d|30|31)\}\}/gi, '$1$2-$3-$4'); //stripping start/end template notes inside "|date=" parameter
// rem corporate designation
txt.value=txt.value.replace(/(\|\s?publisher\s*\=\s*[^\[|}]{1,40}), (?:Inc|LL[CP]|Ltd|PLC|SA)\.?(?=[ ]*\|\})/gi, '$1');
txt.value=txt.value.replace(/(\|\s?publisher\s*\=\s*[^\[|}]{1,40}) (?:Inc|LL[CP]|Ltd|PLC|SA)\.?(?=[ ]*\|\})/gi, '$1');
// rem unnecessary quote marks
txt.value=txt.value.replace(/(\|\s?title\s*\=\s*)["“]([^\|]+)["”](?=\s?[|}])/gi, '$1$2');
txt.value=txt.value.replace(/(\|\s?title\s*\=\s*)['‘]([^\|'’]+)['’](?=\s?[|}])/gi, '$1$2');
// repl double 'in-title' quote marks with single quotes
txt.value=txt.value.replace(/(\|\s?title\s*\=\s*[\w ]* )["“]((?:\w[\w]* )+(?:\w[\w]*))["”]([^\|]+|)(?=\s?[|}])/gi, '$1\'$2\'$3');
// adjust for possibly incorrectly input title
regex(/(\|\s?title\s*\=\s*)([^\|\}<>]*)(\s?\|[^}<>]*|)\|\s?(publisher|work)\s*\=\s*(?:\1|\[\[\1\]\])(?=\s*[|}])/g, '$1ACTUAL ARTICLE TITLE BELONGS HERE! |$4=$2$3');
regex(/(\|\s?title\s*\=\s*)(\w+\.com)(?=\s?[=|{}])/gi, '$1ACTUAL ARTICLE TITLE BELONGS HERE! |publisher=$2');
// rem misplaced punctuation
regex(/(<ref[^>]*>[^<]+?[\]\.\},;–]\s*\'\'[\w-]*(?: [\w-]*){0,3})(\.com|)([;,\.])(\'\')(?=[^<]*?<\/ref>)/gi, '$1$2$4$3');
regex(/([\w]+)\.(['"]\])[ ]/gi, '$1$2. '); //LQ for titles
// removing blank parameters
regex(/(?:\|[ ]*(?:accessdate|agency|archive(?:date|url)|arxiv|asin|at|author(-?link|-mask|-name-separator|-separator|\d|\d-link|link\d?|)|bibcode|chapter|chapter-url|coauthors?|contribution(?:-url|)|date|deadurl|display-authors|doi|doi-inactive|doibroken|edition|editor(?:-first|-last|-link|\d|\d-first|\d-last|\d-link|)|(?:first|last)\d?|format|id|is[bs]n|issue|jfm|journal|jstor|language|lay(?:date|source|summary)|lccn|location|magazine|day|month|mr|newspaper|nopp|oclc|ol|origyear|osti|others|pages?|periodical|place|pm[cd]|pmid|postscript|publication(?:-date|-place)|publisher|quote|ref|rfc|separator|series|ssrn|trans_title|type|url|volume|work|year|zbl)[ ]*=[\s]*)(?=[\}\|])/gi, '');
//rem underlining within certain fields
txt.value=txt.value.replace(/(\|\s?(?:journal|newspaper|periodical|publisher|work)\s*\=\s*)<u>([^|}]*)<\/u>/gi, '$1$2');
//rem redundant top-level domains (.com, .net, .org), strip "www"
txt.value=txt.value.replace(/(\|\s?(?:journal|newspaper|periodical|publisher|work)\s*\=\s*)(\[\[[^\[\]\}]*\]\])\.(?:biz|com|net|org|co\.uk)(?=\s*[|}])/gi, '$1$2');
//rem duplicated publishers in separate fields (pre)
regex(/[‒–—―]+\s*([^|}]{3,})\s*(\|\s?(?:publisher|work)\s*\=\s*(?:\w+\.|))\1(?=\s?[|}])/gi, '$2$1');
//'work' and its alias (pre)
regex(/(\|[ ]*?newspaper[ ]*=[^\|}]*(?:\|[^\{\}]*|))(?:\|[ ]*?work[ ]*=[^|}]*)(?=\s?[|}])+/gi, '$1');
//rem linking within 'location' field
regex(/(\|[ ]*?(?:location|place)=[ ]*?)\[\[ ?(Abkhazia|Afghanistan|Albania|Algeria|Andorra|Angola|Antigua and Barbuda|Argentina|Armenia|Australia|Austria|Azerbaijan|(?:The |)Bahamas|Bahrain|Bangladesh|Barbados|Belarus|Belgium|Belize|Benin|Bhutan|Bolivia|Bosnia and Herzegovina|Botswana|Brazil|Brunei|Bulgaria|Burkina Faso|Burma|Burundi|Cambodia|Cameroon|Canada|Cape Verde|Central African Republic|Chad|Chile|(?:(?:People's |)Republic of |)China|Colombia|Comoros|(?:Democratic |)Republic of (?:the |)Congo|Costa Rica|Côte d'Ivoire|Croatia|Cuba|Cyprus|Czech Republic|(?:Kingdom of |)Denmark|Djibouti|Dominica|Dominican Republic|East Timor|Ecuador|Egypt|El Salvador|Equatorial Guinea|Eritrea|Estonia|Ethiopia|Fiji|Finland|France|Gabon|Georgia \(country\)|Germany|Ghana|Greece|Greenland|Grenada|Guatemala|Guinea|Guinea-Bissau|Guyana|Haiti|Honduras|Hungary|Iceland|India|Indonesia|Iran|Iraq|(?:Republic of |)Ireland|Israel|Italy|Jamaica|Japan|Jordan|Kazakhstan|Kenya|Kiribati|North Korea|South Korea|Kosovo|Kuwait|Kyrgyzstan|Laos|Latvia|Lebanon|Lesotho|Liberia|Libya|Liechtenstein|Lithuania|Luxembourg|(?:Republic of |)Macedonia|Madagascar|Malawi|Malaysia|Maldives|Mali|Malta|Marshall Islands|Mauritania|Mauritius|Myanmar|M[ée]xico|(?:Federated States of |)Micronesia|Moldova|Monaco|Mongolia|Montenegro|Morocco|Mozambique|Nagorno-Karabakh|Namibia|Nauru|Nepal|(?:Kingdom of the |)Netherlands|Holland|New Zealand|Nicaragua|Niger|Nigeria|Northern Cyprus|Norway|Oman|Pakistan|Palau|Palestine|Panama|Papua New Guinea|Paraguay|Peru|Philippines|Poland|Portugal|Qatar|Romania|Russia|Rwanda|SADR|Saint Kitts and Nevis|Saint Lucia|Saint Vincent and the Grenadines|Samoa|San Marino|São Tomé and Príncipe|Saudi Arabia|Senegal|Serbia|Seychelles|Sierra Leone|Singapore|Slovakia|Slovenia|Solomon Islands|Somalia|Somaliland|South Africa|South Ossetia|Spain|Sri Lanka|Sudan|Suriname|Swaziland|Sweden|Switzerland|Syria|Taiwan|Tajikistan|Tanzania|Thailand|Timor Leste|(?:The |)Gambia|Togo|Tonga|Transnistria|Trinidad and Tobago|Tunisia|Turkey|Turkmenistan|Tuvalu|Uganda|Ukraine|United Arab Emirates|United Kingdom|United States|Uruguay|Uzbekistan|Vanuatu|Vatican City|Venezuela|Vietnam|Yemen|Zambia|Zimbabwe)[ ]?\|[ ]?(?:\w{2,3})\]\]/gi, '$1$2');
regex(/(\|[ ]*?(?:location|place)=[ ]*?)\[\[(?:[^\|\]]+?\||)([\w\s\,]+?)\]\](?:(,? )\[\[(?:[^\|\]]+?\||)([\w\s\,]+?)\]\])(?:(,? )\[\[(?:[^\|\]]+?\||)([\w\s\,]+?)\]\]|)(?=[ ]?[|}])/gi, '$1$2$3$4$5$6');
regex(/(\|[ ]*?(?:location|place)=[ ]*?)\[\[(?:[^\|\]]+?\||)([\w\s\,]+?)\]\](?:(,? )\[\[(?:[^\|\]]+?\||)([\w\s\,]+?)\]\]|)(?=[ ]?[|}])/gi, '$1$2$3$4');
// removing english icon template
regex(/[ ]?\{\{en[- ]icon\}\}/gi, "");
// removing icon template from within "|language=" parameter
regex(/(\|[ ]*?language[ ]*?=[ ]*?)\{\{(\w{2})(?:[- ]icon|)\}\}/gi, "$1$2");
// eliminating time of day
regex(/(\|[ ]*author[ ]*=[ ]*)(?:posted|published)(?: by| on|)[\s:](?=\s*\w)/gi, "$1");
regex(/(\|[ ]*(?:date|archivedate|accessdate|author)[ ]*=[ ]*)[0-2]?\d:[0-5]\d(?:[ ]| )(?:[ap]m ?|[ap]\.m\. |[A-Z]{1,2}T|UTC)[\.,]?[ ]?/gi, "$1");
// eliminating days of the week
regex(/(\|[ ]*(?:date|archivedate|accessdate|author)[ ]*=[ ]*)(?:(?:Mon|Tues|Wednes|Thurs|Fri|Satur|Sun)day,?)\s/gi, "$1");
regex(/(\|[ ]*(?:date|archivedate|accessdate|author)[ ]*=[ ]*)(?:(?:Mon|Tues?|Wed|Thur?|Fri|Sat|Sun)[\.,]?)\s/gi, "$1");
//'Accessed' -> 'Retrieved'
ohc_regex(/(?:[\.,;][ ]*(?:url |link |last |)(?:Retrieved|Accessed))(?: on(?:line|)|):? (@month|@dd|@yyyy)(?=\D)/gi, '. Retrieved $1');
ohc_regex(/(\w|\])(?:[ ]*(?:url |link |last |)(?:Retrieved|Accessed))(?: on(?:line|)|):? (@month|@dd|@yyyy)(?=\D)/gi, '$1. Retrieved $2');
ohc_regex(/(?:Retrieved|Accessed)(?: on(?:line|)|):? (@Month\s@DD,?\s@YYYY|@DD\s@Month\s@YYYY|@yyyy-@mm-@dd)(?=\D)/gi, 'Retrieved $1');
ohc_regex(/(\w|\])[\.,;]?[ ]\((Retrieved (?:@Month\s@DD,\s@YYYY|@DD\s@Month\s@YYYY|@yyyy-@mm-@dd))\)/gi, '$1. $2');
}
function Ohc_dab_news_sources() {
var txt=document.editform.wpTextbox1;
//pre-dab of piped sources
// regex(/(\[\[)(?:foo|bar)(\|)/gi, '$1foo bar \(dab\)$2');
regex(/(\[\[)(?:(?:British|English|London) Sun|Sun on Sunday|The Scottish Sun|(?:The |)Sun (?:\((?:British |)newspaper\)|\(tabloid\)|\(UK newspaper\)|\(UK\)|Newspaper|on Sunday|Online)|Thesun\.co\.uk)(?=\|)/gi, '$1The Sun (United Kingdom)');
regex(/(\[\[)Daily Star \((?:British|UK)\)(?=\|)/gi, '$1Daily Star (United Kingdom)');
regex(/(\[\[Metro)(?: \(Associated Metro Limited\)| \(Associated Newspapers\)| \(London newspaper\)| \(free London newspaper\)| UK| newspaper London| newspaper UK)(?=\|)/gi, '$1 (British newspaper)');
regex(/(\[\[)(?:Calcutta Telegraph|The Telegraph \((?:kolkatt?a|India)\)|(?:The |)Telegraph India|Telegraphindia\.com)(?=\|)/gi, '$1The Telegraph (Calcutta)');
regex(/(\[\[)Dawn(?:, Karachi| newspaper|\.com| \((?:Newspaper|Pakistan)\))(?=\|)/gi, '$1Dawn (newspaper)');
regex(/(\[\[The Pioneer)(?:, Karachi| newspaper| \((?:indian newspaper)\))(?=\|)/gi, '$1 (Indian newspaper)');
regex(/(\[\[)dailypioneer.com(?=\|)/gi, '$1The Pioneer (Indian newspaper)');
regex(/(\|)(Sport \()(newspaper\))(?=\||\]\])/g, '$1$2Spanish $3'); //dab moved December 2012
regex(/(=[ ]*\[\[)(?:[BE]SPN ?(?:USA|HD|Network|the ocho|\(United States\))|E.S.P.N.|(?:The |)Entertainment (?:and |)Sports Programming Network)(?:\|[\w, ]*)(?=\]\])/gi, '$1ESPN');
regex(/(?:agency|journal|newspaper|periodical|publisher|work)(\s?=\s?\[\[)(?:MTV (?:[A-Z]\w*|\([^\)\]]*\)))\|[^\)\]]*(?=\]\])/gi, 'publisher$1MTV');
//unwinding of unnecessary pipes
regex(/\[\[Public Broadcasting Service\|(PBS\]\])/gi, '[[$1');
}
function Ohc_publishers() {
var txt=document.editform.wpTextbox1;
//linked publishing houses
// removing publishers less well-known than their titles
regex(/(\|\s?publisher\s?\=MTV\|\s?)publisher\s*\=\s*(?:MTV Networks|Viacom)/gi, '$1=');
regex(/(?:\|publisher=Turner Sports Interactive, Inc)\.? ?\|publisher=(NBA)(?= ?\|)/gi, '$1');
regex(/(\|publisher=NBA) ?\|publisher=(?:Turner Sports Interactive, Inc)\.?(?= ?\|)/gi, '$1');
regex(/\[\[Jann? Wenner\|Wenner Media\]\](?= ?\|)/gi, '');
// removing publishers for periodicals
regex(/\|publisher=\[\[(?:PMC \(company\)\||)(?:PMC|Penske Media Corporation)\]\](?=[\s\.]*[|}])/gi, '');
regex(/(?:\|\s?(newspaper|work|publisher)\s*\=\s*\[?\[?(?:Hachette Filipacchi Médias\||)Hachette Filipacchi(?: \(UK\) Ltd.?| UK|)\]?\]?)(?=[\s\.]*[|}])/gi, '');
regex(/(?:\|\s?(newspaper|work|publisher)\s*\=\s*(ACP Magazines|The Herald and Weekly Times|John Fairfax (and Sons Ltd\.?|Holdings)|Fairfax(?: Media(?: Limited|)| Digital| newspapers|)))(?=[\s\.]*[|}])/gi, '');
regex(/(?:\|\s?(newspaper|work|publisher)\s*\=\s*\[\[(ACP Magazines|The Herald and Weekly Times|John Fairfax (and Sons Ltd\.?|Holdings)|Fairfax(?: Media(?: Limited|)| Digital| newspapers))\]\])(?=[\s\.]*[|}])/gi, '');
regex(/(?:\|\s?publisher\s*\=\s*(Alexander Lebedev|American Media|Associated Newspapers|Cond[eé] Nast(?: Publications|)|Daily Mail and General Trust|Devin Laz[ae]rine|Dow Jones & Company|Future plc|(Guardian|Telegraph) Media Group|(?:Guardian|Independent) News (?:and|&) Media (?:Limited|Ltd\.|)|Hachette Filipacchi Médias|Hearst (?:Corporation|Magazines(?: UK|))|Herald Media|IGN Entertainment|Imdb Inc\.?|InterMedia Partners|IDG|IPC Media|Lee Enterprises|Media ?News Group|Mortimer Zuckerman|MTV Networks|News (?:Corporation|International|Limited)|Prometheus Global Media|Reed Business Information|Rovi Corporation|Trinity Mirror|Times Newspapers|Nielsen (?: Media Research|Business Media)|Viacom|Time(?: Warner ?|)))(,? Inc| LL[CP]| Ltd|Limited|)[\s\.]*(?=[|}\n])/gi, '');
regex(/\|\s?publisher\s*\=\s*(?:The |)(?:Deseret News Publishing|Dispatch Printing|E. W. Scripps|Evening Post Publishing|Forbes(?: Publishing|, Inc\.)|Gannett?|Irish Times Trust|(?:Jann Wenner|Wenner Media)|Johnson Publishing|Journal Communications|Mac Publishing|Media24|McClatchy|Nash holdings LLC|New York Times|Seattle Times|Star Tribune|Thomp?son(?:[- ]?Reuters)?(?: Corporation| Plc.?|)|Torstar|Time Inc\.|Times (?:Group|Publishing)|Tribune|Vox Media|Washington Post|World Publishing|Ziff Davis Media)(?: Co(?:mpany|\.)?)?(?=[\s\.]*[|}])/g, '');
regex(/\|\s?publisher\s*\=\s*(?:Cox|Halifax|North Jersey|Sun-Times|Tampa|Herald|Stephens|WEHCO|\w+) Media( Group(?:, Inc.)?)?(?=[\s\.]*[|}])/g, '');
regex(/\|\s?publisher\s*\=\s*(?:\w+ )+(?:Communications|Media|Publishing|Publications)(?: Group(?:, Inc.)?)?(?=[\s\.]*[|}])/g, '');
//duplicate above with links //("Corporation" excluded - false positive with Australian Broadcasting Corporation)
regex(/(?:\|\s?publisher\s*\=\s*\[\[(Alexander Lebedev|American Media|Associated Newspapers|Cond[eé] Nast(?: Publications|)|Daily Mail and General Trust|Devin Laz[ae]rine|Dow Jones & Company|Future plc|(Guardian|Telegraph) Media Group|(?:Guardian|Independent) News (?:and|&) Media (?:Limited|Ltd\.|)|Hachette Filipacchi Médias|Hearst (?:Corporation|Magazines(?: UK|))|Herald Media|IGN Entertainment|Imdb Inc\.?|InterMedia Partners|IDG|IPC Media|Lee Enterprises|Media ?News Group|Mortimer Zuckerman|MTV Networks|News (?:Corporation|International|Limited)|Prometheus Global Media|Reed Business Information|Rovi Corporation|Trinity Mirror|Times Newspapers|Nielsen (?: Media Research|Business Media)|Viacom|Time(?: Warner ?|)))(,? Inc| LL[CP]| Ltd|Limited|)(?:\|[^\]\}]*|)\]\][\s\.]*(?=[|}\n])/gi, '');
regex(/\|\s?publisher\s*\=\s*\[\[(?:The |)(?:Deseret News Publishing|Dispatch Printing|E. W. Scripps|Evening Post Publishing|Forbes(?: Publishing|, Inc\.)|Gannett?|Irish Times Trust|(?:Jann Wenner|Wenner Media)|Johnson Publishing|Journal Communications|Mac Publishing|Media24|McClatchy|Nash holdings LLC|New York Times|Seattle Times|Star Tribune|Thomp?son(?:[- ]?Reuters)?(?: Corporation| Plc.?|)|Torstar|Time Inc\.|Times Publishing|Tribune|Vox Media|Washington Post|World Publishing|Ziff Davis Media)(?: Co(?:mpany|\.)?)?\]\](?=[\s\.]*[|}])/g, '');
regex(/\|\s?publisher\s*\=\s*\[\[(?:Cox|Halifax|North Jersey|Sun-Times|Tampa|Herald|Stephens|WEHCO|\w+) Media( Group(?:, Inc.)?)?\]\](?=[\s\.]*[|}])/g, '');
regex(/\|\s?publisher\s*\=\s*\[\[(?:\w+ )+(?:Communications|Media|Publishing|Publications)( Group(?:, Inc.)?)?\]\](?=[\s\.]*[|}])/g, '');
}
function Ohc_sources_cleanup() {
var txt=document.editform.wpTextbox1;
// displacing location-dab (in parentheses)
regex(/( \|location=(?:New York|UK))(\]\])/gi, '$2$1');
// The following regexes for dab-links are in sets of four. If changing, please ensure all sets are changed
regex(/(=[ ]*(?:The ?|)[A-Z]\w*(?: [A-Z]\w*|)) \((South Africa)(?:n newspaper|)\)([ ]*[|}])/g, '$1|location=$2$3');
regex(/(''(?:The ?|)[A-Z]\w*(?: [A-Z]\w*|)) \((South Africa)(?:n newspaper|)\)(''[\.,;])/g, '$1$3$2');
regex(/(\[\[((?:The ?|)[A-Z]\w*(?: [A-Z]\w*|)))( \(South Africa(?:n newspaper|)\))(\]\][\.,;]?)/g, '$1$3|$2$4');
regex(/(\|)((?:The ?|)[A-Z]\w*(?: [A-Z]\w*|))(?: \((?:South Africa)(?:n newspaper|)\))(\]\])/g, '$1$2$3');
regex(/(=[ ]*)(Billboard|Fast Company|Q|Time Out) \((?:magazine)\)(?=\s*[|}])/g, '$1$2'); //non-standard code
regex(/('')(Billboard|Fast Company|Q|Time Out) \(magazine\)(?='')/g, '$1$2'); //non-standard code
regex(/(\|)(Billboard|Fast Company|Q|Time Out)(?: \(magazine\))(?=\]\])/g, '$1$2');
regex(/(=[ ]*Daily News) \((New York)\)([ ]*[|}])/g, '$1|location=$2$3');
regex(/(''Daily News) \((New York)\)('')/g, '$1$3 ($2)');
regex(/(\|)(Daily News)(?: \((New York)\))(\]\])/g, '$1$2$4|location=$3');
regex(/(\[\[)(Daily News)( \((New York)\))(\]\])/g, '$1$2$3|$2$5|location=$4');
regex(/(=[ ]*Daily Record|=[ ]*Sunday Mail) \((Scotland)\)(\s*[|}])/g, '$1|location=$2$3');
regex(/(''Daily Record|''Sunday Mail) \((Scotland)\)('')/g, '$1$3 ($2)');
regex(/(\|)(Daily Record|Sunday Mail)(?: \((Scotland)\))(\]\])/g, '$1$2$4|location=$3');
regex(/(\[\[)(Daily Record|Sunday Mail)( \((Scotland)\))(\]\])/g, '$1$2$3|$2$5|location=$4');
regex(/(=[ ]*Dawn) \((newspaper)\)(\s*[|}])/g, '$1|location=Pakistan$3');
regex(/(''Dawn) \((newspaper)\)('')/g, '$1$3 (Pakistan)');
regex(/(\|)(Dawn)(?: \(newspaper\))(\]\])/g, '$1$2$3');
regex(/(=[ ]*(?:Daily Star)) \((United Kingdom)\)(\s*[|}])/g, '$1|location=$2$3');
regex(/(''(?:Daily Star)) \((United Kingdom)\)('')/g, '$1$3 ($2)');
regex(/(\|)(Daily Star)(?: \(United Kingdom\))(\]\])/g, '$1$2$3');
regex(/(=[ ]*El Mundo) \((Columbia|Spain)\)(\s*[|}])/g, '$1|location=$2$3');
regex(/(''El Mundo) \((Columbia|Spain)\)('')/g, '$1$3 ($2)');
regex(/(\|)(El Mundo)(?: \((?:Columbia|Spain)\))(\]\])/g, '$1$2$3');
regex(/(=[ ]*The Daily Star) \((Lebanon)\)(\s*[|}])/g, '$1|location=$2$3');
regex(/(''The Daily Star) \((Lebanon)\)('')/g, '$1$3 ($2)');
regex(/(\|)(The Daily Star)(?: \(Lebanon\))(\]\])/g, '$1$2$3');
regex(/(=[ ]*The Gazette) \((Montreal)\)(\s*[|}])/g, '$1|location=$2$3');
regex(/(''The Gazette) \((Montreal)\)('')/g, '$1$3 ($2)');
regex(/(\|)(The Gazette)(?: \(Montreal\))(\]\])/g, '$1$2$3');
regex(/(=[ ]*The Herald) \((Glasgow)\)(\s*[|}])/g, '$1|location=$2$3');
regex(/(''The Herald) \((Glasgow)\)('')/g, '$1$3 ($2)');
regex(/(\|)(The Herald)(?: \(Glasgow\))(\]\])/g, '$1$2$3');
regex(/(=[ ]*Metro) \((British newspaper)\)(\s*[|}])/g, '$1|location=UK$3');
regex(/(''Metro) \((British newspaper)\)('')/g, '$1$3 (UK)');
regex(/(\|)(Metro)(?: \(British newspaper\))(\]\])/g, '$1$2$3');
regex(/(=[ ]*The Nation) \((Malawi|Nigeria|Pakistan|Thailand)\)(\s*[|}])/g, '$1|location=$2$3');
regex(/(''The Nation) \((Malawi|Nigeria|Pakistan|Thailand)\)('')/g, '$1$3 ($2)');
regex(/(\|)(The Nation)(?: \((?:Malawi|Nigeria|Pakistan|Thailand)\))(\]\])/g, '$1$2$3');
regex(/(=[ ]*The National) \((Abu Dhabi)\)(\s*[|}])/g, '$1|location=$2$3');
regex(/(''The National) \((Abu Dhabi)\)('')/g, '$1$3 ($2)');
regex(/(\|)(The National)(?: \(Abu Dhabi\))(\]\])/g, '$1$2$3');
regex(/(=[ ]*La Presse) \((Canadian newspaper)\)(\s*[|}])/g, '$1|location=Canada$3');
regex(/(''La Presse) \((Canadian newspaper)\)('')/g, '$1$3 (Canada)');
regex(/(\|)(La Presse)(?: \(Canadian newspaper\))(\]\])/g, '$1$2$3');
regex(/(=[ ]*Les Échos) \((France)\)(\s*[|}])/g, '$1|location=$2$3');
regex(/(''Les Échos) \((France)\)('')/g, '$1$3 ($2)');
regex(/(\|)(Les Échos)(?: \(France\))(\]\])/g, '$1$2$3');
regex(/(=[ ]*Panorama) \((Gibraltar)\)(\s*[|}])/g, '$1|location=$2$3');
regex(/(''Panorama) \((Gibraltar)\)('')/g, '$1$3 ($2)');
regex(/(\|)(Panorama)(?: \(Gibraltar\))(\]\])/g, '$1$2$3');
regex(/(=[ ]*The Pioneer) \((Indian newspaper)\)(\s*[|}])/g, '$1|location=India$3');
regex(/(''The Pioneer) \((Indian newspaper)\)('')/g, '$1$3 (India)');
regex(/(\|)(The Pioneer)(?: \(Indian newspaper\))(\]\])/g, '$1$2$3');
regex(/(=[ ]*Sporting Life) \((British newspaper)\)(\s*[|}])/g, '$1|location=UK$3');
regex(/(''Sporting Life) \((British newspaper)\)('')/g, '$1$3 (UK)');
regex(/(\|)(Sporting Life)(?: \(British newspaper\))(\]\])/g, '$1$2$3');
regex(/(=[ ]*The Standard) \((Hong Kong|Kenya)\)(\s*[|}])/g, '$1|location=$2$3');
regex(/(''The Standard) \((Hong Kong|Kenya)\)('')/g, '$1$3 ($2)');
regex(/(\|)(The Standard)(?: \((?:Hong Kong|Kenya)\))(\]\])/g, '$1$2$3');
regex(/(=[ ]*The Star) \((Malaysia)\)(\s*[|}])/g, '$1|location=$2$3');
regex(/(''The Star) \((Malaysia)\)('')/g, '$1$3 ($2)');
regex(/(\|)(The Star)(?: \(Malaysia\))(\]\])/g, '$1$2$3');
regex(/(=[ ]*The Sun) \((Hong Kong|Malaysia|Nigeria|United Kingdom)\)(\s*[|}])/g, '$1|location=$2$3');
regex(/(''The Sun) \((Hong Kong|Malaysia|Nigeria|United Kingdom)\)('')/g, '$1$3 ($2)');
regex(/(\|)(The Sun)(?: \((?:Hong Kong|Malaysia|Nigeria|United Kingdom)\))(\]\])/g, '$1$2$3');
regex(/(=[ ]*The Telegraph) \(Calcutta\)(?=\s*[|}])/g, '$1|location=Kolkota');
regex(/(''The Telegraph) \((Calcutta)\)('')/g, '$1$3 ($2)');
regex(/(\|)(The Telegraph)(?: \(Calcutta\))(\]\])/g, '$1$2$3');
regex(/(=[ ]*The Daily Telegraph) \(Australia\)(?=\s*[|}])/g, '$1|location=Australia');
regex(/(''The Daily Telegraph) \((Australia)\)('')/g, '$1$3 (Australia)');
regex(/(\|)(The Daily Telegraph)(?: \(Australia\))(\]\])/g, '$1$2$3');
regex(/(=[ ]*(?:The Times)) \((Malta)\)(\s*[|}])/g, '$1|location=$2$3');
regex(/(''(?:The Times)) \((Malta)\)('')/g, '$1$3 ($2)');
regex(/(\|)(The Times)(?: \(Malta\))(\]\])/g, '$1$2$3');
regex(/(=[ ]*Vogue) \((British magazine)\)(\s*[|}])/g, '$1|location=UK$3');
regex(/(''Vogue) \((British magazine)\)('')/g, '$1$3 (UK)');
regex(/(\|)(Vogue)(?: \(British magazine\))(\]\])/g, '$1$2$3');
regex(/(= ?(?:ABC|Marca)) \((newspaper)\)([ ]*[|}])/g, '$1|location=Spain$3');
regex(/(''(?:ABC|Marca)) \((newspaper)\)('')/g, '$1$3 (Spain)');
regex(/(\|)(ABC|Marca)(?: \(newspaper\))(\]\])/g, '$1$2$3');
regex(/(= ?(?:Il Giorno)) \((newspaper)\)([ ]*[|}])/g, '$1|location=Italy$3');
regex(/(''(?:Il Giorno)) \((newspaper)\)('')/g, '$1$3 (Italy)');
regex(/(\|)(Il Giorno)(?: \(newspaper\))(\]\])/g, '$1$2$3');
regex(/(= ?(?:RT)) \((TV network)\)([ ]*[|}])/g, '$1|location=Russia$3');
regex(/(\|)(RT)(?: \(TV network\))(\]\])/g, '$1$2$3');
regex(/(=[ ]*\[\[)([^\[\]\(\)\|:]*)( \([^\[\]\(\)\|]*\))(\]\])/gi, '$1$2$3|$2$4'); //adding piping to parentheticals inside parameters
regex(/(?:author|agency|publisher)(\s*\=\s*\[\[[^()|]+ \((?:newspaper|magazine)\)\|[^\[\]|]+\]\])/gi, 'work$1');
// removing redundancies
regex(/ – (?:Times of India|Rediff.com [\w]*)(?=[ ]?\|)/gi, '');
regex(/(?: +[‒–—―] *Times Of India|)(\]. +''The Times of India''\.)indiatimes\.com/gi, '$1');
regex(/(\w''\.)indiatimes\.com/gi, '$1');
regex(/\|[ ]?language[ ]?\=[ ]?English[ ]*?(?=[|}\n])/gi, ''); //note: adjusted for false positive in infoboxes books
regex(/- [\w]*\.com[ ]*\|/gi, '|');
regex(/(.) – Google [^ \]]*(\][\.,;]) Books\.google\.\w{2,3}(\.| )/gi, '$1$2Google Books$3');
regex(/(.) at Discogs(\][\.,;]) Discogs\.com(\.| )/gi, '$1$2Discogs$3');
regex(/\|\s?author\s?\=(?:posted|publishe[dr]|written)\s?(?:by|on)\s/gi, '|author=');
regex(/\|\s?(?:work|publisher)(\s?\=MTV)\|\s?publisher\s*\=\s*(?:MTV Networks|Viacom)/gi, '|publisher$1=');
regex(/\|\s?(?:publisher|work)\s*\=\s*(?:BBC|BBC News(?: Online|))\s*(\|[^}<>]*|)\|\s?publisher\s*\=\s*(?:BBC|BBC News(?: Online|)|British Broadcasting Corporation)(?=[\s\.]*[|}])/g, '|publisher=BBC News $1');
regex(/\|\s?(?:publisher|work)\s*\=\s*(BBC Sports?)\s*(\|[^}<>]*|)\|\s?publisher\s*\=\s*(?:BBC|BBC News(?: Online|)|British Broadcasting Corporation)(?=[\s\.]*[|}])/g, '|publisher=$1$2');
//rem duplicated publishers in separate fields (post); rem preceding nbsp
txt.value=txt.value.replace(/(?:[‒–—―]+||)\s*(?:The |)([^\|\}&]{3,})(?:\.com|)\s*(\|\s?(?:publisher|work)\s*\=\s*)\1(?=\s*[|}])/gi, '$2$1');
txt.value=txt.value.replace(/\s? \s?(\|\s?(?:publisher|work)\s*\=\s*)/gi, ' $1');
//per [[Help:Citation Style 1#Elements not included]]
regex(/(?:-[ ]*Google Books[ ]*(\|[^}]*|)|)\|\s?publisher\s*\=\s*Google Books(?=[\s\.]*[|}])/g, '');
regex(/\|\s?publisher\s*\=\s*(?:Project Gutenberg|Proquest|Scribd|web(?:\.archive\|citation).org)(?=[\s\.]*[|}])/g, '');
// reordering 'work' and 'publisher'; reordering 'work' and 'website'
regex(/(\|\s?publisher\s*\=\s*(?:\[\[[^<{}\]]*\]\]|[^{}\|\}<>]*))(\s?\|[^}<>]*|)(\|\s?(?: journal|newspaper|magazine|periodical|website|work)\s*\=\s*(?:\[\[[^<{}\]]*\]\]|[^{}\|\}<>]*))(?=[\s\.]*[|}])/g, '$3$1$2');
regex(/(\|\s?website\s*\=\s*(?:\[\[[^<{}\]]*\]\]|[^{}\|\}<>]*))(\s?\|[^}<>]*|)(\|\s?work\s*\=\s*(?:\[\[[^<{}\]]*\]\]|[^{}\|\}<>]*))(?=[\s\.]*[|}])/g, '$3$1$2');
// removing identical/similar entries in 'work' and 'publisher'
regex(/\|\s?work\s*\=\s*([^\|\}<>]*)(\s?\|[^}<>]*|)\|\s?(?:publisher|work)\s*\=\s*(?:\1|\[\[\1\]\])(?=[\s\.]*[|}])/g, '|work=$1$2');
regex(/\|\s?work\s*\=\s*(\[\[(?:[^<\|\]]*)\]\]|[^<\|\]]*)(\s?\|[^}<>]*|)\|\s?(?:publisher|work)\s*\=\s*\1(?=[\s\.]*[|}])/g, '|work=$1$2');
regex(/\|\s?publisher\s*\=\s*([^\[\]|}<>]*)(\s?\|[^}<>]*|)\|\s?(?:publisher|work)\s*\=\s*(?:\1|\[\[\1\]\])(?=[\s\.]*[|}])/g, '|work=$1$2');
regex(/\|\s?publisher\s*\=\s*(\[\[(?:[^|\]]*)\]\]|[^<\|\]\}]*)(\s?\|[^}<>]*|)\|\s?(?:publisher|work)\s*\=\s*\1(?=[\s\.]*[|}])/g, '|work=$1$2');
regex(/\|\s?location\s*\=\s*New York(?: City|)\s*(\|[^}<>]*|)\|\s?location\s*\=\s*(New York(?: City|)|USA)(?=[\s\.]*[|}])/g, '|location=New York $1');
regex(/(?:\|[ ]*?location=[^\|]*)(\|[ ]*?location=[^|}]*)(?=[|}\n])+/gi, '$1');
regex(/(\|[ ]*?publisher=[^=}]*)(?:\|[ ]*?publisher=[^=}]*)(?=[|}\n])+/gi, '$1');
regex(/(\|[ ]*?work=[^=}]*)(?:\|[ ]*?work=[^=}]*)(?=[|}\n])+/gi, '$1');
//'work' and its alias (pre)
regex(/(\|[ ]*?newspaper[ ]*=[^\|}]*(?:\|[^\{\}]*|))(?:\|[ ]*?work[ ]*=[^|}]*)(?=[|}\n])+/gi, '$1');
//unwinding of unnecessary pipes
regex(/\[\[([^\]\|]*)\|\1(?=\]\])/gi, '[[$1');
// removing artefacts (within citation templates)
regex(/(\|[ ]*?author=)(?:(?:Mon|Tues|Wednes|Thurs|Fri|Satur|Sun)day,? ?)(?=[^\]\|\}]*\|)/gi, '$1');
regex(/(?:| (?:Daily|English|(?:Mail |)Online|Music|News|Indian Express))(?=\s*\|)+/gi, '');
regex(/(?:(?:[ ]+|[ ]+HighBeam Research[ ]+[-–][ ]+FREE trial[ ]+|)\|publisher=Highbeam.com)/gi, '');
regex(/( | [\w, ]*?)(?=[ ]|)/gi, '');
regex(/( | Comment is free)/gi, '');
regex(/\|\s?title\s*\=\s*BBC (?:News|Sport)\s?(?:–|| )\s?/gi, '|title=');
regex(/(?:Football|international|Latest|local|UK|world|) News |(?=[ ]?[&\|])/gi, '');
regex(/(?:[-–|]||)[ ]*(?:Football|international|Latest|local|UK|world|) News(?=[ ]?[&\|])/gi, '');
regex(/<!-- Bot generated title -->/gi, '');
regex(/(DOC|PDF)\) \./gi, '$1).');
// removing artefacts (outside of citation templates)
regex(/([\w]+\'\')\.(?:co(?:m|m?\.\w{2})|\.\w{2})[ ]/gi, '$1. ');
// removing other artefacts
regex(/(UEFA\]\])\.(?:co(?:m|m?\.\w{2})|\.\w{2})(?= ?[\|{}])/gi, '$1');
//dynamic columns for reflists; remove scroll bar
regex(/((?:[Rr]eferences|[Nn]otes)[ ]?={2,4}[\n\r])[\r\n\s]*<div (?:style|class)=[^>]*>([\S\s]*)<\/div>/g, '$1$2');
regex(/(?:\{\{[Rr]eflist\}\}|<[Rr]eferences ?\/>)/g, '{{reflist|colwidth=30em}}');
}
function Ohc_sourceunlink() {
var txt=document.editform.wpTextbox1;
//removal of all links from sources
//rem linking within citation template parameters
regex(/(\|[ ]*?(?:author|agency|publisher|journal|newspaper|periodical|work)=[ ]*?)\[\[([^\|\]]+?)\]\](?:(,? )\[\[([^\|\]]+?)\]\]|)([ ]{0,1})/gi, '$1$2$3$4$5');
regex(/(\|[ ]*?(?:author|agency|publisher|journal|newspaper|periodical|work)=[ ]*?)\[\[(?:[^\|\]]+?\|)([\w\s\,]+?)\]\](?:(,? )\[\[([^\|\]]+?)\]\]|)([ ]{0,1})/gi, '$1$2$3$4$5');
//rem other linking within refs tags
regex(/(<ref[^>]*>[^<]+?[\]\.,;]\s+)\[\[(?:[^\|\]<]*\||)([^\|\]<]*)(?: online|)\]\]([^<]*?<\/ref>)\) \./gi, '$1$2$3).');
regex(/(<ref[^>]*>[^<]+?[\]\.,;]\s+\'\')\[\[(?:[^\|\]<]*\||)([^\|\]<]*)(?: online|)\]\](\'\'[^<]*?<\/ref>)\) \./gi, '$1$2$3).');
}
/** ------------------------------------------------------------------------ **/
/// PROTECTION BY STRING SUBSTITUTION
var linkmap=[];
function ohc_protect_linkspam()
{
// protects everything within reference tags
// the sensitive part is stored and replaced with a unique identifier,
// which is later replaced with the stored part.
var protect_function = function(s, begin, replace, end) {
linkmap.push(replace);
return begin + "⍌"+(linkmap.length-1)+"⍍" + end;
};
regex(/(<ref[^>]*?>)(.*?)(<\/ref>)/gi, protect_function);
regex(/(\*[ ]?\[(?:https?:|ftp:))([^\]]*)(\])/gi, protect_function);
}
function ohc_unprotect_linkspam()
{
//removes protection put in place by function ohc_protect_fmt (all cats, templates etc.)
regex(/⍌([0-9]+)⍍/g, function(x, n) {
var res = linkmap[n];
res = res.replace(/⍌([0-9]+)⍍/g, function(x, n) {
var res = linkmap[n];
res = res.replace(/⍌([0-9]+)⍍/g, function(x, n) {
var res = linkmap[n];
res = res.replace(/⍌([0-9]+)⍍/g, function(x, n) {
return linkmap[n];
});
return res;
});
return res;
});
return res;
});
}
var linkmap=[];
function ohc_protect_urls()
{
// protects only urls
// the sensitive part is stored and replaced with a unique identifier,
// which is later replaced with the stored part.
var protect_function = function(s, begin, replace, end) {
linkmap.push(replace);
return begin + "⍌"+(linkmap.length-1)+"⍍" + end;
};
// protect the rest (after purging urls inserted in ('website' or )'work' parameters)
regex(/((?:[\[=]\s*)(?:https?:|ftp:))([^\]\|\}]*)(\s*[\]|}])/gi, protect_function);
regex(/(\{\{(?:harv\w*|sfn\w*|cite ?book)\s?\|)([^\}]+)(\})/gi, protect_function);
regex(/(\|\s*contribution\s*=)([^|}]+)(\|\})/gi, protect_function);
}
function ohc_unprotect_urls()
{
//removes protection put in place by function ohc_protect_fmt (all cats, templates etc.)
regex(/⍌([0-9]+)⍍/g, function(x, n) {
var res = linkmap[n];
res = res.replace(/⍌([0-9]+)⍍/g, function(x, n) {
var res = linkmap[n];
res = res.replace(/⍌([0-9]+)⍍/g, function(x, n) {
var res = linkmap[n];
res = res.replace(/⍌([0-9]+)⍍/g, function(x, n) {
return linkmap[n];
});
return res;
});
return res;
});
return res;
});
}
/** ------------------------------------------------------------------------ **/
function Ohc_Source_edit_summary(){
//Add a tag to the summary box
setoptions(minor='true');
setreason('per [[Help:Citation Style 1|CS1]], [[Template:Citation]] and [[MOS:ITALICS]] by [[User:Ohconfucius/script|script]]', 'append');
doaction('diff');
}
function Ohc_linkspam_driver() {
var txt=document.editform.wpTextbox1;
ohc_protect_linkspam();
Ohc_linkspam();
ohc_unprotect_linkspam();
setreason('rem [[WP:Linkspam|linkspam]]', 'append');
}
function Ohc_ref_format_new() {
var txt=document.editform.wpTextbox1;
Ohc_remove_urls();
ohc_protect_urls();
Ohc_sources_prep();
ohc_foreign_dates();
Ohc_unpipe();
Ohc_dab_news_sources();
Ohc_sourcename();
Ohc_sourcework();
Ohc_sourcepub();
Ohc_sourceagency();
Ohc_redo_pipe();
Ohc_publishers();
Ohc_sources_cleanup();
ohc_unprotect_urls();
Ohc_Source_edit_summary();
}
function Ohc_noverify() {
var txt=document.editform.wpTextbox1;
// removing references to other WP articles and 'external' WP links
regex(/<ref[^<>]*>[^<>]*\|[ ]*url ?=https?:\/\/(?:\w{2}\.wikipedia\.org\/wiki|(?:www\.|)(?:facebook|myspace|twitter)\.com)\/[^<>]*<\/ref>/gi, '');
regex(/<ref>https?:\/\/(?:en\.wikipedia\.org\/wiki|(?:www\.|)(?:facebook|myspace|twitter)\.com)\/[^\s\]<]*<\/ref>/gi, '');
regex(/<ref>\[https?:\/\/(?:en\.wikipedia\.org\/wiki|(?:www\.|)(?:facebook|myspace|twitter)\.com)\/[^\s\]]*[ ]+[\w\d][^\]]*\]<\/ref>/gi, '');
regex(/\|[ ]*url[ ]*=[ ]*https?:\/\/(?:\w{2}\.wikipedia\.org\/wiki|(?:www\.|)(?:facebook|myspace|twitter)\.com)[^\s\|\{\}<]*(?=[ ]*[|}])/gi, '');
regex(/[ ]\[https?:\/\/en\.wikipedia\.org\/wiki\/[^\s\]]*[ ]+([\w][^\]]*)\]/gi, ' [[$1]]');
}
addOnloadHook(function () {
if(document.forms.editform) {
// addPortletLink('p-tb', 'javascript:function_name()', 'Button name', 't-dmy', 'Hover text', '', '');
addPortletLink('p-tb', 'javascript:Ohc_linkspam_driver()', 'Linkspam', 'rm-linkspam', 'removes linkspam in running text', '', '');
addPortletLink('p-tb', 'javascript:Ohc_sourceunlink()', 'Unlink source name', 't-citefix', 'Unlinks source', '', '');
addPortletLink('p-tb', 'javascript:Ohc_ref_format_new()', 'Fix SOURCES', 't-citefix', 'Run entire new module', '', '');
addPortletLink('p-tb', 'javascript:Ohc_noverify()', 'Unverifiable', 'sort', 'sort', '', '');
// addPortletLink('p-tb', 'javascript:Ohc_remove_urls()', 'Remove certain urls', 't-citefix', '0.Improper urls', '', '');
// addPortletLink('p-tb', 'javascript:Ohc_sources_prep()', 'Prepare sources', 't-citefix', '1.Prepares sources', '', '');
// addPortletLink('p-tb', 'javascript:Ohc_unpipe()', 'Unpipe sources', 't-citefix', '2.Unpipe linked sources', '', '');
// addPortletLink('p-tb', 'javascript:Ohc_dab_news_sources()', 'Applies DAB', 't-citefix', '3.Applies disambiguation', '', '');
// addPortletLink('p-tb', 'javascript:Ohc_sourcename()', 'Align source name', 't-citefix', '4.Corrects source name – subscript1', '', '');
// addPortletLink('p-tb', 'javascript:Ohc_sourcework()', 'Cleanup WORK', 't-citefix', '5.Cleanup and reclassifies as WORK – subscript2', '', '');
// addPortletLink('p-tb', 'javascript:Ohc_sourcepub()', 'Cleanup publisher', 't-citefix', '6.Cleanup and reclassifies as PUBLISHER – subscript2', '', '');
// addPortletLink('p-tb', 'javascript:Ohc_sourceagency()', 'Cleanup agency', 't-citefix', '7.Cleanup and reclassifies as AGENCY – subscript2', '', '');
// addPortletLink('p-tb', 'javascript:Ohc_redo_pipe()', 'Repipe sources', 't-citefix', '8.Repipe ambiguous links', '', '');
// addPortletLink('p-tb', 'javascript:Ohc_publishers()', 'Rem publishers', 't-citefix', '9.Removing certain publishers fields', '', '');
// addPortletLink('p-tb', 'javascript:Ohc_sources_cleanup()', 'Final cleanup', 't-citefix', '10.Cleanup after script actions', '', '');
}
});