Jump to content

User:Ohconfucius/test/Sources.js: Difference between revisions

From Wikipedia, the free encyclopedia
Content deleted Content added
further tweak to removing http links within publisher/journal/work fields
tweaks to increase lookaheads use
Line 110: Line 110:
// rem misplaced punctuation
// rem misplaced punctuation
regex(/(<ref[^>]*>[^<]+?[\]\.\},;–]\s*\'\'[\w-]*(?: [\w-]*){0,3})(\.com|)([;,\.])(\'\')([^<]*?<\/ref>)/gi, '$1$2$4$3$5');
regex(/(<ref[^>]*>[^<]+?[\]\.\},;–]\s*\'\'[\w-]*(?: [\w-]*){0,3})(\.com|)([;,\.])(\'\')(?=[^<]*?<\/ref>)/gi, '$1$2$4$3');
regex(/([\w]+)\.(['"]\])[ ]/gi, '$1$2. '); //LQ for titles
regex(/([\w]+)\.(['"]\])[ ]/gi, '$1$2. '); //LQ for titles


Line 117: Line 117:


//rem underlining within certain fields
//rem underlining within certain fields
txt.value=txt.value.replace(/(\|\s?(?:journal|newspaper|periodical|publisher|work)\s*\=\s*)<u>([^|}]*)<\/u>/gi, '$1');
txt.value=txt.value.replace(/(\|\s?(?:journal|newspaper|periodical|publisher|work)\s*\=\s*)<u>([^|}]*)<\/u>/gi, '$1$2');


//rem redundant top-level domains (.com, .net, .org), strip "www"
//rem redundant top-level domains (.com, .net, .org), strip "www"
Line 161: Line 161:
// regex(/(\[\[)(?:foo|bar)(\|)/gi, '$1foo bar \(dab\)$2');
// regex(/(\[\[)(?:foo|bar)(\|)/gi, '$1foo bar \(dab\)$2');


regex(/(\[\[)(?:(?:British|English|London) Sun|Sun on Sunday|The Scottish Sun|(?:The |)Sun (?:\((?:British |)newspaper\)|\(tabloid\)|\(UK newspaper\)|\(UK\)|Newspaper|on Sunday|Online)|Thesun\.co\.uk)(\|)/gi, '$1The Sun (United Kingdom)$2');
regex(/(\[\[)(?:(?:British|English|London) Sun|Sun on Sunday|The Scottish Sun|(?:The |)Sun (?:\((?:British |)newspaper\)|\(tabloid\)|\(UK newspaper\)|\(UK\)|Newspaper|on Sunday|Online)|Thesun\.co\.uk)(?=\|)/gi, '$1The Sun (United Kingdom)');
regex(/(\[\[)Daily Star \((?:British|UK)\)(\|)/gi, '$1Daily Star (United Kingdom)$2');
regex(/(\[\[)Daily Star \((?:British|UK)\)(?=\|)/gi, '$1Daily Star (United Kingdom)');
regex(/(\[\[Metro)(?: \(Associated Metro Limited\)| \(Associated Newspapers\)| \(London newspaper\)| \(free London newspaper\)| UK| newspaper London| newspaper UK)(\|)/gi, '$1 (British newspaper)$2');
regex(/(\[\[Metro)(?: \(Associated Metro Limited\)| \(Associated Newspapers\)| \(London newspaper\)| \(free London newspaper\)| UK| newspaper London| newspaper UK)(?=\|)/gi, '$1 (British newspaper)');
regex(/(\[\[)(?:Calcutta Telegraph|The Telegraph \((?:kolkatt?a|India)\)|(?:The |)Telegraph India|Telegraphindia\.com)(\|)/gi, '$1The Telegraph (Calcutta)$2');
regex(/(\[\[)(?:Calcutta Telegraph|The Telegraph \((?:kolkatt?a|India)\)|(?:The |)Telegraph India|Telegraphindia\.com)(?=\|)/gi, '$1The Telegraph (Calcutta)');
regex(/(\[\[)Dawn(?:, Karachi| newspaper|\.com| \((?:Newspaper|Pakistan)\))(\|)/gi, '$1Dawn (newspaper)$2');
regex(/(\[\[)Dawn(?:, Karachi| newspaper|\.com| \((?:Newspaper|Pakistan)\))(?=\|)/gi, '$1Dawn (newspaper)');
regex(/(\[\[The Pioneer)(?:, Karachi| newspaper| \((?:indian newspaper)\))(\|)/gi, '$1 (Indian newspaper)$2');
regex(/(\[\[The Pioneer)(?:, Karachi| newspaper| \((?:indian newspaper)\))(?=\|)/gi, '$1 (Indian newspaper)');
regex(/(\[\[)dailypioneer.com(\|)/gi, '$1The Pioneer (Indian newspaper)$2');
regex(/(\[\[)dailypioneer.com(?=\|)/gi, '$1The Pioneer (Indian newspaper)');
regex(/(\|)(Sport \()(newspaper\))(\]\])/g, '$1$2Spanish $3'); //dab moved December 2012
regex(/(\|)(Sport \()(newspaper\))(?=\||\]\])/g, '$1$2Spanish $3'); //dab moved December 2012
regex(/(=[ ]*\[\[)(?:[BE]SPN ?(?:USA|HD|Network|the ocho|\(United States\))|E.S.P.N.|(?:The |)Entertainment (?:and |)Sports Programming Network)(?:\|[\w, ]*)(\]\])/gi, '$1ESPN$2');
regex(/(=[ ]*\[\[)(?:[BE]SPN ?(?:USA|HD|Network|the ocho|\(United States\))|E.S.P.N.|(?:The |)Entertainment (?:and |)Sports Programming Network)(?:\|[\w, ]*)(?=\]\])/gi, '$1ESPN');


regex(/(?:agency|journal|newspaper|periodical|publisher|work)(\s?=\s?\[\[)(?:MTV (?:[A-Z]\w*|\([^\)\]]*\)))\|[^\)\]]*(\]\])/gi, 'publisher$1MTV$2');
regex(/(?:agency|journal|newspaper|periodical|publisher|work)(\s?=\s?\[\[)(?:MTV (?:[A-Z]\w*|\([^\)\]]*\)))\|[^\)\]]*(?=\]\])/gi, 'publisher$1MTV');


//unwinding of unnecessary pipes
//unwinding of unnecessary pipes
regex(/(\[\[)Public Broadcasting Service\|(PBS\]\])/gi, '$1$2');
regex(/\[\[Public Broadcasting Service\|(PBS\]\])/gi, '[[$1');


}
}
Line 281: Line 281:
regex(/(\|)(The Sun)(?: \((?:Hong Kong|Malaysia|Nigeria|United Kingdom)\))(\]\])/g, '$1$2$3');
regex(/(\|)(The Sun)(?: \((?:Hong Kong|Malaysia|Nigeria|United Kingdom)\))(\]\])/g, '$1$2$3');
regex(/(=[ ]*The Telegraph) \(Calcutta\)(?=\s*[|}])/g, '$1|location=Kolkota');
regex(/(=[ ]*The Telegraph) \(Calcutta\)(?=\s*[|}])/g, '$1|location=Kolkota');
regex(/(''The Telegraph) \((Calcutta)\)('')/g, '$1$3 (Calcutta)');
regex(/(''The Telegraph) \((Calcutta)\)('')/g, '$1$3 ($2)');
regex(/(\|)(The Telegraph)(?: \(Calcutta\))(\]\])/g, '$1$2$3');
regex(/(\|)(The Telegraph)(?: \(Calcutta\))(\]\])/g, '$1$2$3');
regex(/(=[ ]*The Daily Telegraph) \(Australia\)(?=\s*[|}])/g, '$1|location=Australia');
regex(/(=[ ]*The Daily Telegraph) \(Australia\)(?=\s*[|}])/g, '$1|location=Australia');

Revision as of 15:55, 12 April 2014

// *********************************************************************************************

//This is a test (non-production) script, and may have untested errors. Please exercise due care should you decide to use it.

// *********************************************************************************************

/*************
*** Regex menu framework
*** by [[m:user:Pathoschild]] <http://meta.wikimedia.org/wiki/User:Pathoschild/Scripts/Regex_menu_framework>
***	- adds a sidebar menu of user-defined scripts.
*************/
importScriptURI('//meta.wikimedia.org/w/index.php?title=User:Pathoschild/Scripts/Regex_menu_framework.js&action=raw&ctype=text/javascript');
importScript("User:Ohconfucius/script/MOSNUM_utils.js"); //needed for "'Accessed' -> 'Retrieved'"

importScript("User:Ohconfucius/test/Sources_subscript1.js"); //convert domain names into article names
importScript("User:Ohconfucius/test/Sources_subscript2.js"); //Correctly casing titles and apply or rem italicisation
importScript("User:Ohconfucius/test/Sources_subscript3.js"); //link-fixing, dabbing etc
importScript("User:Ohconfucius/script/foreigndates.js"); //link-fixing, dabbing etc
 
function Ohc_linkspam() {
 var txt=document.editform.wpTextbox1;
 
	txt.value=txt.value.replace(/([^>\*][ ]?)\[https?:\/\/[^\s\[\]]*[ ]([\'\w\d][^\[\]]*)\](?![ ]*[\n\-]+)/gi, '$1$2');


}

function Ohc_remove_urls() {
 var txt=document.editform.wpTextbox1;
 
// removing http links within publisher/journal/work fields
    regex(/((?:author|publisher|work) *= *)(?:https?:|ftp:)\/{2}(?:\w{2,4}\.|)(\w+)\.(?:com?|net|org|gov)(?:\.\w{2}|)(?:\/[^|}]*|)(?=\s*[\]|}])/gi, '$1$2');
	txt.value=txt.value.replace(/(\|\s?(?:author(?:link\d?|)|journal|newspaper|publisher|website|work)\s*\=\s*)(?:https?:\/\/|)www\.(\w)/gi, '$1$2'); //leave only domain name
	txt.value=txt.value.replace(/(\|\s?(?:newspaper|work|journal|publisher)\s*\=\s*)\[https?:\/\/[^\s\]]*\s([\w][^\]]*)\]/gi, '$1$2');

	txt.value=txt.value.replace(/(\|\s?author(?:link\d?|)\s*\=\s*)\[https?:\/\/[^\s\]]*\s([\w][^\]]*)\]/gi, '$1$2');
	txt.value=txt.value.replace(/(\|\s?author(?:link\d?|)\s*\=\s*)(?:https?:\/\/|)www\.[\w][^|}]*(?=[|}\n])/gi, '$1');  //rem outright (not a WL)

// removing references to other WP articles and 'external' WP links
	regex(/<ref[^<>]*>[^<>]*\|[ ]*url ?=https?:\/\/(?:\w{2}\.wikipedia\.org\/wiki|(?:www\.|)(?:facebook|myspace|twitter)\.com)\/[^<>]*<\/ref>/gi, '{{cn}}');
	regex(/<ref>https?:\/\/(?:\w{2}\.wikipedia\.org\/wiki|(?:www\.|)(?:facebook|myspace)\.com)\/[^\s\]<]*<\/ref>/gi, '{{cn}}');
	regex(/<ref>\[https?:\/\/(?:\w{2}\.wikipedia\.org\/wiki|(?:www\.|)(?:facebook|myspace)\.com)\/[^\s\]]*[ ]+[\w\d][^\]]*\]<\/ref>/gi, '{{cn}}');
	regex(/\|[ ]*url[ ]*=[ ]*https?:\/\/(?:\w{2}\.wikipedia\.org\/wiki|(?:www\.|)(?:facebook|myspace)\.com)[^\s\|\{\}<]*(?=[ ]*[|}])/gi, '');

	regex(/[ ]\[https?:\/\/\w{2}\.wikipedia\.org\/wiki\/[^\s\]]*[ ]+([\w][^\]]*)\]/gi, ' [[$1]]');
	regex(/(\|\s?url\s*\=\s*|\[)(https?:[^|{}#\s]+)#[A-Za-z0-9\.]{12,13}(?=[\s\[\]|{}<>])/gi, '$1$2'); //rem link tracking


}

function Ohc_sources_prep() {
 var txt=document.editform.wpTextbox1;

// removing artefacts within fields
	regex(/(\|\s?author\s*\=\s*)(?:by |)(?:wire staff|(?:staff |)reporters?|)[ ]*(?=[|}\n])/gi, '');
	regex(/(\|\s?author\s*\=\s*)([A-Z][a-z]*(?: [A-Z][a-z]*)*) (?:wire staff|(?:staff |)reporters?)[ ]*(?=[|}\n])/gi, '$1$2');
    regex(/\|[ ]*last=(Reporter|staff)[ ]*\|[ ]*first=[^|\{\}]*(?=[\|{}])=/gi, '');
	regex(/(\|\s?accessdate\s*\=\s*)(?:accessed|retrieved)(?: by| on|):?[ ]*(\d)/gi, '$1$2');
	regex(/(\|\s?volume\s*\=\s*)vol(?:ume|\.?)[ ]*(\d)/gi, '$1$2');
	regex(/(\|\s?pages?\s*\=\s*)(?:pages?|p[gp]?\.?)[ ]*(\d)/gi, '$1$2');
   
    //Remove COinS corrupting templates from CS1 citations
	regex(/(\|\s?(?:authors?|first\d?|last\d?|publisher|work)\s*\=\s*(?:[^{}|]*|)){{(?:Sm|Aut|SC|Small[- ]caps|Sm?caps)\|([^{}|]*)}}(?=(?:[^{}|]*|)[|}])/gi, '$1');

	regex(/(\|\s?)\w+\=(url\s*\=\s*https?:\/\/)(?=[|}\n])/gi, '$1$2'); //common cs1 error
	regex(/(\|\s?url\s*\=)(www\.)(?=[|}\n])/gi, '$1http//$2'); //common cs1 error
	regex(/(\|\s*date\s*=\s*)(?:not? |non-|un)date[ds]?\s*(?=[|}\n])/gi, '$1n.d.'); //common cs1 error

	regex(/\{\{wikinews ?(|2|cat(?:egory)?|has|par2?|portal|table|-inline)(\|[^\}]+|)\}\}\s*/gi, '');
	regex(/(\*[ ]*|)\[\[n:[^\]]*\]\][^\r\n]*[\r\n]/gi, '');
	regex(/\*[ ]*\{\{(?:Facebook|Find a Grave|Myspace)\|([^}]*)\}\}[\n\r\s]*/gi, '');

// removing inappropriately populated fields
//	regex(/(\|\s?at\s*\=\s*(?:pages? |)(?:[-–\d\s,;]*) ?)[^|}]+(?=[|}\n])/gi, '');

//citation template fixes
	regex(/(\|\s?)published\s?=/gi, '$1publisher=');
    // rem copyright assertion
	regex(/(\|\s?publisher\s*\=\s*)(?:\[\[copyright(?:\|©|)\]\])\s?/gi, '$1');
	regex(/(\|\s?publisher\s*\=\s*)(?:©|copyright)\s?/gi, '$1');
    // misused 'date' parameter
	regex(/\|\s?date(\s?=\s?[12]\d{3}\s?[|}])/gi, '|year$1');

    // rem toggles and redundant quote marks
	txt.value=txt.value.replace(/(\|\s?(?:agency|author|newspaper|work|journal|publisher)\s*\=\s*)\'\'([^|}]+)\'\'(?=\s*[\}\|])/gi, '$1$2'); //without link
	txt.value=txt.value.replace(/(\|\s?(?:agency|author|newspaper|work|journal|publisher)\s*\=\s*)\'\'(\[\[(?:[^\|]+\||)[^\|\]]+\]\])\'\'(?=\s*[\}\|])/gi, '$1$2');  //with link
    txt.value=txt.value.replace(/(\|\s?title\s*\=\s*)\'&#39;([^\|\{\}]+)\'&#39;/gi, '$1$2'); //rem &#39; in titles
    txt.value=txt.value.replace(/(\|\s?publisher\s*\=\s*)\(([^\|\{\}]+)\)/gi, '$1$2'); //rem parenthetical publishers	

// reordering 'work' and 'publisher' (first run - see second run in cleanup function)
         regex(/(\|\s?publisher\s*\=\s*(?:\[\[[^<{}\]]*\]\]|[^{}\|\}<>]*))(\s?\|[^}<>]*|)(\|\s?(?: journal|newspaper|magazine|periodical|website|work)\s*\=\s*(?:\[\[[^<{}\]]*\]\]|[^{}\|\}<>]*))(?=[\s\.]*[|}])/g, '$3$1$2');

     // remove redundant parentheses and templates from dm and md dates (equivalents also exists in Mosnum script)
    regex(/(=[ ]*)\(([^()|{}])\)/gi, '$1$2');
    regex(/(date[ ]*=[ ]*)\{\{(?:Start|End) ?date\|(\d{4})\|(0?\d|1[012])\|([0-2]?\d|30|31)\}\}/gi, '$1$2-$3-$4');  //stripping start/end template notes inside "|date=" parameter

    // rem corporate designation
	txt.value=txt.value.replace(/(\|\s?publisher\s*\=\s*[^\[|}]{1,40}), (?:Inc|LL[CP]|Ltd|PLC|SA)\.?(?=[ ]*\|\})/gi, '$1');
	txt.value=txt.value.replace(/(\|\s?publisher\s*\=\s*[^\[|}]{1,40}) (?:Inc|LL[CP]|Ltd|PLC|SA)\.?(?=[ ]*\|\})/gi, '$1');

    // rem unnecessary quote marks
	txt.value=txt.value.replace(/(\|\s?title\s*\=\s*)["“]([^\|]+)["”](?=\s?[|}])/gi, '$1$2');
	txt.value=txt.value.replace(/(\|\s?title\s*\=\s*)['‘]([^\|'’]+)['’](?=\s?[|}])/gi, '$1$2');
    // repl double 'in-title' quote marks with single quotes
	txt.value=txt.value.replace(/(\|\s?title\s*\=\s*[\w ]* )["“]((?:\w[\w]* )+(?:\w[\w]*))["”]([^\|]+|)(?=\s?[|}])/gi, '$1\'$2\'$3');

  // adjust for possibly incorrectly input title
         regex(/(\|\s?title\s*\=\s*)([^\|\}<>]*)(\s?\|[^}<>]*|)\|\s?(publisher|work)\s*\=\s*(?:\1|\[\[\1\]\])(?=\s*[|}])/g, '$1ACTUAL ARTICLE TITLE BELONGS HERE! |$4=$2$3');
         regex(/(\|\s?title\s*\=\s*)(\w+\.com)(?=\s?[=|{}])/gi, '$1ACTUAL ARTICLE TITLE BELONGS HERE! |publisher=$2');
 
    // rem misplaced punctuation
         regex(/(<ref[^>]*>[^<]+?[\]\.\},;–]\s*\'\'[\w-]*(?: [\w-]*){0,3})(\.com|)([;,\.])(\'\')(?=[^<]*?<\/ref>)/gi, '$1$2$4$3');
         regex(/([\w]+)\.(['"]\])[ ]/gi, '$1$2. ');    //LQ for titles

    // removing blank parameters
         regex(/(?:\|[ ]*(?:accessdate|agency|archive(?:date|url)|arxiv|asin|at|author(-?link|-mask|-name-separator|-separator|\d|\d-link|link\d?|)|bibcode|chapter|chapter-url|coauthors?|contribution(?:-url|)|date|deadurl|display-authors|doi|doi-inactive|doibroken|edition|editor(?:-first|-last|-link|\d|\d-first|\d-last|\d-link|)|(?:first|last)\d?|format|id|is[bs]n|issue|jfm|journal|jstor|language|lay(?:date|source|summary)|lccn|location|magazine|day|month|mr|newspaper|nopp|oclc|ol|origyear|osti|others|pages?|periodical|place|pm[cd]|pmid|postscript|publication(?:-date|-place)|publisher|quote|ref|rfc|separator|series|ssrn|trans_title|type|url|volume|work|year|zbl)[ ]*=[\s]*)(?=[\}\|])/gi, '');

    //rem underlining within certain fields
	txt.value=txt.value.replace(/(\|\s?(?:journal|newspaper|periodical|publisher|work)\s*\=\s*)<u>([^|}]*)<\/u>/gi, '$1$2');

    //rem redundant top-level domains (.com, .net, .org), strip "www"
    txt.value=txt.value.replace(/(\|\s?(?:journal|newspaper|periodical|publisher|work)\s*\=\s*)(\[\[[^\[\]\}]*\]\])\.(?:biz|com|net|org|co\.uk)(?=\s*[|}])/gi, '$1$2');

    //rem duplicated publishers in separate fields (pre)
         regex(/[‒–—―]+\s*([^|}]{3,})\s*(\|\s?(?:publisher|work)\s*\=\s*(?:\w+\.|))\1(?=\s?[|}])/gi, '$2$1');
    //'work' and its alias (pre)
         regex(/(\|[ ]*?newspaper[ ]*=[^\|}]*(?:\|[^\{\}]*|))(?:\|[ ]*?work[ ]*=[^|}]*)(?=\s?[|}])+/gi, '$1');

    //rem linking within 'location' field
         regex(/(\|[ ]*?(?:location|place)=[ ]*?)\[\[ ?(Abkhazia|Afghanistan|Albania|Algeria|Andorra|Angola|Antigua and Barbuda|Argentina|Armenia|Australia|Austria|Azerbaijan|(?:The |)Bahamas|Bahrain|Bangladesh|Barbados|Belarus|Belgium|Belize|Benin|Bhutan|Bolivia|Bosnia and Herzegovina|Botswana|Brazil|Brunei|Bulgaria|Burkina Faso|Burma|Burundi|Cambodia|Cameroon|Canada|Cape Verde|Central African Republic|Chad|Chile|(?:(?:People's |)Republic of |)China|Colombia|Comoros|(?:Democratic |)Republic of (?:the |)Congo|Costa Rica|Côte d'Ivoire|Croatia|Cuba|Cyprus|Czech Republic|(?:Kingdom of |)Denmark|Djibouti|Dominica|Dominican Republic|East Timor|Ecuador|Egypt|El Salvador|Equatorial Guinea|Eritrea|Estonia|Ethiopia|Fiji|Finland|France|Gabon|Georgia \(country\)|Germany|Ghana|Greece|Greenland|Grenada|Guatemala|Guinea|Guinea-Bissau|Guyana|Haiti|Honduras|Hungary|Iceland|India|Indonesia|Iran|Iraq|(?:Republic of |)Ireland|Israel|Italy|Jamaica|Japan|Jordan|Kazakhstan|Kenya|Kiribati|North Korea|South Korea|Kosovo|Kuwait|Kyrgyzstan|Laos|Latvia|Lebanon|Lesotho|Liberia|Libya|Liechtenstein|Lithuania|Luxembourg|(?:Republic of |)Macedonia|Madagascar|Malawi|Malaysia|Maldives|Mali|Malta|Marshall Islands|Mauritania|Mauritius|Myanmar|M[ée]xico|(?:Federated States of |)Micronesia|Moldova|Monaco|Mongolia|Montenegro|Morocco|Mozambique|Nagorno-Karabakh|Namibia|Nauru|Nepal|(?:Kingdom of the |)Netherlands|Holland|New Zealand|Nicaragua|Niger|Nigeria|Northern Cyprus|Norway|Oman|Pakistan|Palau|Palestine|Panama|Papua New Guinea|Paraguay|Peru|Philippines|Poland|Portugal|Qatar|Romania|Russia|Rwanda|SADR|Saint Kitts and Nevis|Saint Lucia|Saint Vincent and the Grenadines|Samoa|San Marino|São Tomé and Príncipe|Saudi Arabia|Senegal|Serbia|Seychelles|Sierra Leone|Singapore|Slovakia|Slovenia|Solomon Islands|Somalia|Somaliland|South Africa|South Ossetia|Spain|Sri Lanka|Sudan|Suriname|Swaziland|Sweden|Switzerland|Syria|Taiwan|Tajikistan|Tanzania|Thailand|Timor Leste|(?:The |)Gambia|Togo|Tonga|Transnistria|Trinidad and Tobago|Tunisia|Turkey|Turkmenistan|Tuvalu|Uganda|Ukraine|United Arab Emirates|United Kingdom|United States|Uruguay|Uzbekistan|Vanuatu|Vatican City|Venezuela|Vietnam|Yemen|Zambia|Zimbabwe)[ ]?\|[ ]?(?:\w{2,3})\]\]/gi, '$1$2');
         regex(/(\|[ ]*?(?:location|place)=[ ]*?)\[\[(?:[^\|\]]+?\||)([\w\s\,]+?)\]\](?:(,? )\[\[(?:[^\|\]]+?\||)([\w\s\,]+?)\]\])(?:(,? )\[\[(?:[^\|\]]+?\||)([\w\s\,]+?)\]\]|)(?=[ ]?[|}])/gi, '$1$2$3$4$5$6');
         regex(/(\|[ ]*?(?:location|place)=[ ]*?)\[\[(?:[^\|\]]+?\||)([\w\s\,]+?)\]\](?:(,? )\[\[(?:[^\|\]]+?\||)([\w\s\,]+?)\]\]|)(?=[ ]?[|}])/gi, '$1$2$3$4');

// removing english icon template
    regex(/[ ]?\{\{en[- ]icon\}\}/gi, "");

// removing icon template from within "|language=" parameter
    regex(/(\|[ ]*?language[ ]*?=[ ]*?)\{\{(\w{2})(?:[- ]icon|)\}\}/gi, "$1$2");

// eliminating time of day
    regex(/(\|[ ]*author[ ]*=[ ]*)(?:posted|published)(?: by| on|)[\s:](?=\s*\w)/gi, "$1");
    regex(/(\|[ ]*(?:date|archivedate|accessdate|author)[ ]*=[ ]*)[0-2]?\d:[0-5]\d(?:[ ]|&nbsp;)(?:[ap]m ?|[ap]\.m\. |[A-Z]{1,2}T|UTC)[\.,]?[ ]?/gi, "$1");

    // eliminating days of the week
    regex(/(\|[ ]*(?:date|archivedate|accessdate|author)[ ]*=[ ]*)(?:(?:Mon|Tues|Wednes|Thurs|Fri|Satur|Sun)day,?)\s/gi, "$1");
    regex(/(\|[ ]*(?:date|archivedate|accessdate|author)[ ]*=[ ]*)(?:(?:Mon|Tues?|Wed|Thur?|Fri|Sat|Sun)[\.,]?)\s/gi, "$1");

    //'Accessed' -> 'Retrieved'
    ohc_regex(/(?:[\.,;][ ]*(?:url |link |last |)(?:Retrieved|Accessed))(?: on(?:line|)|):? (@month|@dd|@yyyy)(?=\D)/gi, '. Retrieved $1');
    ohc_regex(/(\w|\])(?:[ ]*(?:url |link |last |)(?:Retrieved|Accessed))(?: on(?:line|)|):? (@month|@dd|@yyyy)(?=\D)/gi, '$1. Retrieved $2');

    ohc_regex(/(?:Retrieved|Accessed)(?: on(?:line|)|):? (@Month\s@DD,?\s@YYYY|@DD\s@Month\s@YYYY|@yyyy-@mm-@dd)(?=\D)/gi, 'Retrieved $1');
    ohc_regex(/(\w|\])[\.,;]?[ ]\((Retrieved (?:@Month\s@DD,\s@YYYY|@DD\s@Month\s@YYYY|@yyyy-@mm-@dd))\)/gi, '$1. $2');

}

function Ohc_dab_news_sources() {
 var txt=document.editform.wpTextbox1;

//pre-dab of piped sources
//         regex(/(\[\[)(?:foo|bar)(\|)/gi, '$1foo bar \(dab\)$2');

         regex(/(\[\[)(?:(?:British|English|London) Sun|Sun on Sunday|The Scottish Sun|(?:The |)Sun (?:\((?:British |)newspaper\)|\(tabloid\)|\(UK newspaper\)|\(UK\)|Newspaper|on Sunday|Online)|Thesun\.co\.uk)(?=\|)/gi, '$1The Sun (United Kingdom)');
         regex(/(\[\[)Daily Star \((?:British|UK)\)(?=\|)/gi, '$1Daily Star (United Kingdom)');
         regex(/(\[\[Metro)(?: \(Associated Metro Limited\)| \(Associated Newspapers\)| \(London newspaper\)| \(free London newspaper\)| UK| newspaper London| newspaper UK)(?=\|)/gi, '$1 (British newspaper)');
         regex(/(\[\[)(?:Calcutta Telegraph|The Telegraph \((?:kolkatt?a|India)\)|(?:The |)Telegraph India|Telegraphindia\.com)(?=\|)/gi, '$1The Telegraph (Calcutta)');
         regex(/(\[\[)Dawn(?:, Karachi| newspaper|\.com| \((?:Newspaper|Pakistan)\))(?=\|)/gi, '$1Dawn (newspaper)');
         regex(/(\[\[The Pioneer)(?:, Karachi| newspaper| \((?:indian newspaper)\))(?=\|)/gi, '$1 (Indian newspaper)');
         regex(/(\[\[)dailypioneer.com(?=\|)/gi, '$1The Pioneer (Indian newspaper)');
         regex(/(\|)(Sport \()(newspaper\))(?=\||\]\])/g, '$1$2Spanish $3');  //dab moved December 2012
         regex(/(=[ ]*\[\[)(?:[BE]SPN ?(?:USA|HD|Network|the ocho|\(United States\))|E.S.P.N.|(?:The |)Entertainment (?:and |)Sports Programming Network)(?:\|[\w, ]*)(?=\]\])/gi, '$1ESPN');

         regex(/(?:agency|journal|newspaper|periodical|publisher|work)(\s?=\s?\[\[)(?:MTV (?:[A-Z]\w*|\([^\)\]]*\)))\|[^\)\]]*(?=\]\])/gi, 'publisher$1MTV');

//unwinding of unnecessary pipes
         regex(/\[\[Public Broadcasting Service\|(PBS\]\])/gi, '[[$1');

}

function Ohc_publishers() {
 var txt=document.editform.wpTextbox1;

//linked publishing houses
// removing publishers less well-known than their titles
         regex(/(\|\s?publisher\s?\=MTV\|\s?)publisher\s*\=\s*(?:MTV Networks|Viacom)/gi, '$1=');

         regex(/(?:\|publisher=Turner Sports Interactive, Inc)\.? ?\|publisher=(NBA)(?= ?\|)/gi, '$1');
         regex(/(\|publisher=NBA) ?\|publisher=(?:Turner Sports Interactive, Inc)\.?(?= ?\|)/gi, '$1');
         regex(/\[\[Jann? Wenner\|Wenner Media\]\](?= ?\|)/gi, '');
// removing publishers for periodicals
         regex(/\|publisher=\[\[(?:PMC \(company\)\||)(?:PMC|Penske Media Corporation)\]\](?=[\s\.]*[|}])/gi, '');
	regex(/(?:\|\s?(newspaper|work|publisher)\s*\=\s*\[?\[?(?:Hachette Filipacchi Médias\||)Hachette Filipacchi(?: \(UK\) Ltd.?| UK|)\]?\]?)(?=[\s\.]*[|}])/gi, '');
	regex(/(?:\|\s?(newspaper|work|publisher)\s*\=\s*(ACP Magazines|The Herald and Weekly Times|John Fairfax (and Sons Ltd\.?|Holdings)|Fairfax(?: Media(?: Limited|)| Digital| newspapers|)))(?=[\s\.]*[|}])/gi, '');
	regex(/(?:\|\s?(newspaper|work|publisher)\s*\=\s*\[\[(ACP Magazines|The Herald and Weekly Times|John Fairfax (and Sons Ltd\.?|Holdings)|Fairfax(?: Media(?: Limited|)| Digital| newspapers))\]\])(?=[\s\.]*[|}])/gi, '');
	regex(/(?:\|\s?publisher\s*\=\s*(Alexander Lebedev|American Media|Associated Newspapers|Cond[eé] Nast(?: Publications|)|Daily Mail and General Trust|Devin Laz[ae]rine|Dow Jones & Company|Future plc|(Guardian|Telegraph) Media Group|(?:Guardian|Independent) News (?:and|&) Media (?:Limited|Ltd\.|)|Hachette Filipacchi Médias|Hearst (?:Corporation|Magazines(?: UK|))|Herald Media|IGN Entertainment|Imdb Inc\.?|InterMedia Partners|IDG|IPC Media|Lee Enterprises|Media ?News Group|Mortimer Zuckerman|MTV Networks|News (?:Corporation|International|Limited)|Prometheus Global Media|Reed Business Information|Rovi Corporation|Trinity Mirror|Times Newspapers|Nielsen (?: Media Research|Business Media)|Viacom|Time(?: Warner ?|)))(,? Inc| LL[CP]| Ltd|Limited|)[\s\.]*(?=[|}\n])/gi, '');
         regex(/\|\s?publisher\s*\=\s*(?:The |)(?:Deseret News Publishing|Dispatch Printing|E. W. Scripps|Evening Post Publishing|Forbes(?: Publishing|, Inc\.)|Gannett?|Irish Times Trust|(?:Jann Wenner|Wenner Media)|Johnson Publishing|Journal Communications|Mac Publishing|Media24|McClatchy|Nash holdings LLC|New York Times|Seattle Times|Star Tribune|Thomp?son(?:[- ]?Reuters)?(?: Corporation| Plc.?|)|Torstar|Time Inc\.|Times (?:Group|Publishing)|Tribune|Vox Media|Washington Post|World Publishing|Ziff Davis Media)(?: Co(?:mpany|\.)?)?(?=[\s\.]*[|}])/g, '');
         regex(/\|\s?publisher\s*\=\s*(?:Cox|Halifax|North Jersey|Sun-Times|Tampa|Herald|Stephens|WEHCO|\w+) Media( Group(?:, Inc.)?)?(?=[\s\.]*[|}])/g, '');
         regex(/\|\s?publisher\s*\=\s*(?:\w+ )+(?:Communications|Media|Publishing|Publications)(?: Group(?:, Inc.)?)?(?=[\s\.]*[|}])/g, '');
//duplicate above with links //("Corporation" excluded - false positive with Australian Broadcasting Corporation)
	regex(/(?:\|\s?publisher\s*\=\s*\[\[(Alexander Lebedev|American Media|Associated Newspapers|Cond[eé] Nast(?: Publications|)|Daily Mail and General Trust|Devin Laz[ae]rine|Dow Jones & Company|Future plc|(Guardian|Telegraph) Media Group|(?:Guardian|Independent) News (?:and|&) Media (?:Limited|Ltd\.|)|Hachette Filipacchi Médias|Hearst (?:Corporation|Magazines(?: UK|))|Herald Media|IGN Entertainment|Imdb Inc\.?|InterMedia Partners|IDG|IPC Media|Lee Enterprises|Media ?News Group|Mortimer Zuckerman|MTV Networks|News (?:Corporation|International|Limited)|Prometheus Global Media|Reed Business Information|Rovi Corporation|Trinity Mirror|Times Newspapers|Nielsen (?: Media Research|Business Media)|Viacom|Time(?: Warner ?|)))(,? Inc| LL[CP]| Ltd|Limited|)(?:\|[^\]\}]*|)\]\][\s\.]*(?=[|}\n])/gi, '');
         regex(/\|\s?publisher\s*\=\s*\[\[(?:The |)(?:Deseret News Publishing|Dispatch Printing|E. W. Scripps|Evening Post Publishing|Forbes(?: Publishing|, Inc\.)|Gannett?|Irish Times Trust|(?:Jann Wenner|Wenner Media)|Johnson Publishing|Journal Communications|Mac Publishing|Media24|McClatchy|Nash holdings LLC|New York Times|Seattle Times|Star Tribune|Thomp?son(?:[- ]?Reuters)?(?: Corporation| Plc.?|)|Torstar|Time Inc\.|Times Publishing|Tribune|Vox Media|Washington Post|World Publishing|Ziff Davis Media)(?: Co(?:mpany|\.)?)?\]\](?=[\s\.]*[|}])/g, '');
         regex(/\|\s?publisher\s*\=\s*\[\[(?:Cox|Halifax|North Jersey|Sun-Times|Tampa|Herald|Stephens|WEHCO|\w+) Media( Group(?:, Inc.)?)?\]\](?=[\s\.]*[|}])/g, '');
         regex(/\|\s?publisher\s*\=\s*\[\[(?:\w+ )+(?:Communications|Media|Publishing|Publications)( Group(?:, Inc.)?)?\]\](?=[\s\.]*[|}])/g, '');

}

function Ohc_sources_cleanup() {
 var txt=document.editform.wpTextbox1;

// displacing location-dab (in parentheses)
         regex(/( \|location=(?:New York|UK))(\]\])/gi, '$2$1');

         // The following regexes for dab-links are in sets of four. If changing, please ensure all sets are changed
         regex(/(=[ ]*(?:The ?|)[A-Z]\w*(?: [A-Z]\w*|)) \((South Africa)(?:n newspaper|)\)([ ]*[|}])/g, '$1|location=$2$3');
         regex(/(''(?:The ?|)[A-Z]\w*(?: [A-Z]\w*|)) \((South Africa)(?:n newspaper|)\)(''[\.,;])/g, '$1$3$2');
         regex(/(\[\[((?:The ?|)[A-Z]\w*(?: [A-Z]\w*|)))( \(South Africa(?:n newspaper|)\))(\]\][\.,;]?)/g, '$1$3|$2$4');
         regex(/(\|)((?:The ?|)[A-Z]\w*(?: [A-Z]\w*|))(?: \((?:South Africa)(?:n newspaper|)\))(\]\])/g, '$1$2$3');

         regex(/(=[ ]*)(Billboard|Fast Company|Q|Time Out) \((?:magazine)\)(?=\s*[|}])/g, '$1$2'); //non-standard code
         regex(/('')(Billboard|Fast Company|Q|Time Out) \(magazine\)(?='')/g, '$1$2'); //non-standard code
         regex(/(\|)(Billboard|Fast Company|Q|Time Out)(?: \(magazine\))(?=\]\])/g, '$1$2'); 
         regex(/(=[ ]*Daily News) \((New York)\)([ ]*[|}])/g, '$1|location=$2$3');
         regex(/(''Daily News) \((New York)\)('')/g, '$1$3 ($2)');
         regex(/(\|)(Daily News)(?: \((New York)\))(\]\])/g, '$1$2$4|location=$3');
         regex(/(\[\[)(Daily News)( \((New York)\))(\]\])/g, '$1$2$3|$2$5|location=$4');
         regex(/(=[ ]*Daily Record|=[ ]*Sunday Mail) \((Scotland)\)(\s*[|}])/g, '$1|location=$2$3');
         regex(/(''Daily Record|''Sunday Mail) \((Scotland)\)('')/g, '$1$3 ($2)');
         regex(/(\|)(Daily Record|Sunday Mail)(?: \((Scotland)\))(\]\])/g, '$1$2$4|location=$3');
         regex(/(\[\[)(Daily Record|Sunday Mail)( \((Scotland)\))(\]\])/g, '$1$2$3|$2$5|location=$4');
         regex(/(=[ ]*Dawn) \((newspaper)\)(\s*[|}])/g, '$1|location=Pakistan$3');
         regex(/(''Dawn) \((newspaper)\)('')/g, '$1$3 (Pakistan)');
         regex(/(\|)(Dawn)(?: \(newspaper\))(\]\])/g, '$1$2$3');

         regex(/(=[ ]*(?:Daily Star)) \((United Kingdom)\)(\s*[|}])/g, '$1|location=$2$3');
         regex(/(''(?:Daily Star)) \((United Kingdom)\)('')/g, '$1$3 ($2)');
         regex(/(\|)(Daily Star)(?: \(United Kingdom\))(\]\])/g, '$1$2$3');
         regex(/(=[ ]*El Mundo) \((Columbia|Spain)\)(\s*[|}])/g, '$1|location=$2$3');
         regex(/(''El Mundo) \((Columbia|Spain)\)('')/g, '$1$3 ($2)');
         regex(/(\|)(El Mundo)(?: \((?:Columbia|Spain)\))(\]\])/g, '$1$2$3');
         regex(/(=[ ]*The Daily Star) \((Lebanon)\)(\s*[|}])/g, '$1|location=$2$3');
         regex(/(''The Daily Star) \((Lebanon)\)('')/g, '$1$3 ($2)');
         regex(/(\|)(The Daily Star)(?: \(Lebanon\))(\]\])/g, '$1$2$3');
         regex(/(=[ ]*The Gazette) \((Montreal)\)(\s*[|}])/g, '$1|location=$2$3');
         regex(/(''The Gazette) \((Montreal)\)('')/g, '$1$3 ($2)');
         regex(/(\|)(The Gazette)(?: \(Montreal\))(\]\])/g, '$1$2$3');
         regex(/(=[ ]*The Herald) \((Glasgow)\)(\s*[|}])/g, '$1|location=$2$3');
         regex(/(''The Herald) \((Glasgow)\)('')/g, '$1$3 ($2)');
         regex(/(\|)(The Herald)(?: \(Glasgow\))(\]\])/g, '$1$2$3');
         regex(/(=[ ]*Metro) \((British newspaper)\)(\s*[|}])/g, '$1|location=UK$3');
         regex(/(''Metro) \((British newspaper)\)('')/g, '$1$3 (UK)');
         regex(/(\|)(Metro)(?: \(British newspaper\))(\]\])/g, '$1$2$3');
         regex(/(=[ ]*The Nation) \((Malawi|Nigeria|Pakistan|Thailand)\)(\s*[|}])/g, '$1|location=$2$3');
         regex(/(''The Nation) \((Malawi|Nigeria|Pakistan|Thailand)\)('')/g, '$1$3 ($2)');
         regex(/(\|)(The Nation)(?: \((?:Malawi|Nigeria|Pakistan|Thailand)\))(\]\])/g, '$1$2$3');
         regex(/(=[ ]*The National) \((Abu Dhabi)\)(\s*[|}])/g, '$1|location=$2$3');
         regex(/(''The National) \((Abu Dhabi)\)('')/g, '$1$3 ($2)');
         regex(/(\|)(The National)(?: \(Abu Dhabi\))(\]\])/g, '$1$2$3');
         regex(/(=[ ]*La Presse) \((Canadian newspaper)\)(\s*[|}])/g, '$1|location=Canada$3');
         regex(/(''La Presse) \((Canadian newspaper)\)('')/g, '$1$3 (Canada)');
         regex(/(\|)(La Presse)(?: \(Canadian newspaper\))(\]\])/g, '$1$2$3');
         regex(/(=[ ]*Les Échos) \((France)\)(\s*[|}])/g, '$1|location=$2$3');
         regex(/(''Les Échos) \((France)\)('')/g, '$1$3 ($2)');
         regex(/(\|)(Les Échos)(?: \(France\))(\]\])/g, '$1$2$3');
         regex(/(=[ ]*Panorama) \((Gibraltar)\)(\s*[|}])/g, '$1|location=$2$3');
         regex(/(''Panorama) \((Gibraltar)\)('')/g, '$1$3 ($2)');
         regex(/(\|)(Panorama)(?: \(Gibraltar\))(\]\])/g, '$1$2$3');
         regex(/(=[ ]*The Pioneer) \((Indian newspaper)\)(\s*[|}])/g, '$1|location=India$3');
         regex(/(''The Pioneer) \((Indian newspaper)\)('')/g, '$1$3 (India)');
         regex(/(\|)(The Pioneer)(?: \(Indian newspaper\))(\]\])/g, '$1$2$3');
         regex(/(=[ ]*Sporting Life) \((British newspaper)\)(\s*[|}])/g, '$1|location=UK$3');
         regex(/(''Sporting Life) \((British newspaper)\)('')/g, '$1$3 (UK)');
         regex(/(\|)(Sporting Life)(?: \(British newspaper\))(\]\])/g, '$1$2$3');
         regex(/(=[ ]*The Standard) \((Hong Kong|Kenya)\)(\s*[|}])/g, '$1|location=$2$3');
         regex(/(''The Standard) \((Hong Kong|Kenya)\)('')/g, '$1$3 ($2)');
         regex(/(\|)(The Standard)(?: \((?:Hong Kong|Kenya)\))(\]\])/g, '$1$2$3');
         regex(/(=[ ]*The Star) \((Malaysia)\)(\s*[|}])/g, '$1|location=$2$3');
         regex(/(''The Star) \((Malaysia)\)('')/g, '$1$3 ($2)');
         regex(/(\|)(The Star)(?: \(Malaysia\))(\]\])/g, '$1$2$3');
         regex(/(=[ ]*The Sun) \((Hong Kong|Malaysia|Nigeria|United Kingdom)\)(\s*[|}])/g, '$1|location=$2$3');
         regex(/(''The Sun) \((Hong Kong|Malaysia|Nigeria|United Kingdom)\)('')/g, '$1$3 ($2)');
         regex(/(\|)(The Sun)(?: \((?:Hong Kong|Malaysia|Nigeria|United Kingdom)\))(\]\])/g, '$1$2$3');
         regex(/(=[ ]*The Telegraph) \(Calcutta\)(?=\s*[|}])/g, '$1|location=Kolkota');
         regex(/(''The Telegraph) \((Calcutta)\)('')/g, '$1$3 ($2)');
         regex(/(\|)(The Telegraph)(?: \(Calcutta\))(\]\])/g, '$1$2$3');
         regex(/(=[ ]*The Daily Telegraph) \(Australia\)(?=\s*[|}])/g, '$1|location=Australia');
         regex(/(''The Daily Telegraph) \((Australia)\)('')/g, '$1$3 (Australia)');
         regex(/(\|)(The Daily Telegraph)(?: \(Australia\))(\]\])/g, '$1$2$3');
         regex(/(=[ ]*(?:The Times)) \((Malta)\)(\s*[|}])/g, '$1|location=$2$3');
         regex(/(''(?:The Times)) \((Malta)\)('')/g, '$1$3 ($2)');
         regex(/(\|)(The Times)(?: \(Malta\))(\]\])/g, '$1$2$3');
         regex(/(=[ ]*Vogue) \((British magazine)\)(\s*[|}])/g, '$1|location=UK$3');
         regex(/(''Vogue) \((British magazine)\)('')/g, '$1$3 (UK)');
         regex(/(\|)(Vogue)(?: \(British magazine\))(\]\])/g, '$1$2$3');

         regex(/(= ?(?:ABC|Marca)) \((newspaper)\)([ ]*[|}])/g, '$1|location=Spain$3');
         regex(/(''(?:ABC|Marca)) \((newspaper)\)('')/g, '$1$3 (Spain)');
         regex(/(\|)(ABC|Marca)(?: \(newspaper\))(\]\])/g, '$1$2$3');
         regex(/(= ?(?:Il Giorno)) \((newspaper)\)([ ]*[|}])/g, '$1|location=Italy$3');
         regex(/(''(?:Il Giorno)) \((newspaper)\)('')/g, '$1$3 (Italy)');
         regex(/(\|)(Il Giorno)(?: \(newspaper\))(\]\])/g, '$1$2$3');
  
         regex(/(= ?(?:RT)) \((TV network)\)([ ]*[|}])/g, '$1|location=Russia$3');
         regex(/(\|)(RT)(?: \(TV network\))(\]\])/g, '$1$2$3');

         regex(/(=[ ]*\[\[)([^\[\]\(\)\|:]*)( \([^\[\]\(\)\|]*\))(\]\])/gi, '$1$2$3|$2$4');	//adding piping to parentheticals inside parameters

         regex(/(?:author|agency|publisher)(\s*\=\s*\[\[[^()|]+ \((?:newspaper|magazine)\)\|[^\[\]|]+\]\])/gi, 'work$1');

// removing redundancies
         regex(/ – (?:Times of India|Rediff.com [\w]*)(?=[ ]?\|)/gi, '');
         regex(/(?: +[‒–—―] *Times Of India|)(\]. +''The Times of India''\.)indiatimes\.com/gi, '$1');
         regex(/(\w''\.)indiatimes\.com/gi, '$1');

         regex(/\|[ ]?language[ ]?\=[ ]?English[ ]*?(?=[|}\n])/gi, '');  //note: adjusted for false positive in infoboxes books
         regex(/- [\w]*\.com[ ]*\|/gi, '|');
         regex(/(.) – Google [^ \]]*(\][\.,;]) Books\.google\.\w{2,3}(\.| )/gi, '$1$2Google Books$3');
         regex(/(.) at Discogs(\][\.,;]) Discogs\.com(\.| )/gi, '$1$2Discogs$3');
         regex(/\|\s?author\s?\=(?:posted|publishe[dr]|written)\s?(?:by|on)\s/gi, '|author=');
         regex(/\|\s?(?:work|publisher)(\s?\=MTV)\|\s?publisher\s*\=\s*(?:MTV Networks|Viacom)/gi, '|publisher$1=');

         regex(/\|\s?(?:publisher|work)\s*\=\s*(?:BBC|BBC News(?: Online|))\s*(\|[^}<>]*|)\|\s?publisher\s*\=\s*(?:BBC|BBC News(?: Online|)|British Broadcasting Corporation)(?=[\s\.]*[|}])/g, '|publisher=BBC News $1');
         regex(/\|\s?(?:publisher|work)\s*\=\s*(BBC Sports?)\s*(\|[^}<>]*|)\|\s?publisher\s*\=\s*(?:BBC|BBC News(?: Online|)|British Broadcasting Corporation)(?=[\s\.]*[|}])/g, '|publisher=$1$2');

    //rem duplicated publishers in separate fields (post); rem preceding nbsp
	txt.value=txt.value.replace(/(?:[‒–—―]+|&#124;)\s*(?:The |)([^\|\}&]{3,})(?:\.com|)\s*(\|\s?(?:publisher|work)\s*\=\s*)\1(?=\s*[|}])/gi, '$2$1');
	txt.value=txt.value.replace(/\s?&nbsp;\s?(\|\s?(?:publisher|work)\s*\=\s*)/gi, ' $1');

//per [[Help:Citation Style 1#Elements not included]]
         regex(/(?:-[ ]*Google Books[ ]*(\|[^}]*|)|)\|\s?publisher\s*\=\s*Google Books(?=[\s\.]*[|}])/g, '');
         regex(/\|\s?publisher\s*\=\s*(?:Project Gutenberg|Proquest|Scribd|web(?:\.archive\|citation).org)(?=[\s\.]*[|}])/g, '');

// reordering 'work' and 'publisher'; reordering 'work' and 'website'
         regex(/(\|\s?publisher\s*\=\s*(?:\[\[[^<{}\]]*\]\]|[^{}\|\}<>]*))(\s?\|[^}<>]*|)(\|\s?(?: journal|newspaper|magazine|periodical|website|work)\s*\=\s*(?:\[\[[^<{}\]]*\]\]|[^{}\|\}<>]*))(?=[\s\.]*[|}])/g, '$3$1$2');
         regex(/(\|\s?website\s*\=\s*(?:\[\[[^<{}\]]*\]\]|[^{}\|\}<>]*))(\s?\|[^}<>]*|)(\|\s?work\s*\=\s*(?:\[\[[^<{}\]]*\]\]|[^{}\|\}<>]*))(?=[\s\.]*[|}])/g, '$3$1$2');

// removing identical/similar entries in 'work' and 'publisher'
         regex(/\|\s?work\s*\=\s*([^\|\}<>]*)(\s?\|[^}<>]*|)\|\s?(?:publisher|work)\s*\=\s*(?:\1|\[\[\1\]\])(?=[\s\.]*[|}])/g, '|work=$1$2');
         regex(/\|\s?work\s*\=\s*(\[\[(?:[^<\|\]]*)\]\]|[^<\|\]]*)(\s?\|[^}<>]*|)\|\s?(?:publisher|work)\s*\=\s*\1(?=[\s\.]*[|}])/g, '|work=$1$2');

         regex(/\|\s?publisher\s*\=\s*([^\[\]|}<>]*)(\s?\|[^}<>]*|)\|\s?(?:publisher|work)\s*\=\s*(?:\1|\[\[\1\]\])(?=[\s\.]*[|}])/g, '|work=$1$2');
         regex(/\|\s?publisher\s*\=\s*(\[\[(?:[^|\]]*)\]\]|[^<\|\]\}]*)(\s?\|[^}<>]*|)\|\s?(?:publisher|work)\s*\=\s*\1(?=[\s\.]*[|}])/g, '|work=$1$2');

         regex(/\|\s?location\s*\=\s*New York(?: City|)\s*(\|[^}<>]*|)\|\s?location\s*\=\s*(New York(?: City|)|USA)(?=[\s\.]*[|}])/g, '|location=New York $1');

         regex(/(?:\|[ ]*?location=[^\|]*)(\|[ ]*?location=[^|}]*)(?=[|}\n])+/gi, '$1');
         regex(/(\|[ ]*?publisher=[^=}]*)(?:\|[ ]*?publisher=[^=}]*)(?=[|}\n])+/gi, '$1');
         regex(/(\|[ ]*?work=[^=}]*)(?:\|[ ]*?work=[^=}]*)(?=[|}\n])+/gi, '$1');

    //'work' and its alias (pre)
         regex(/(\|[ ]*?newspaper[ ]*=[^\|}]*(?:\|[^\{\}]*|))(?:\|[ ]*?work[ ]*=[^|}]*)(?=[|}\n])+/gi, '$1');

//unwinding of unnecessary pipes
         regex(/\[\[([^\]\|]*)\|\1(?=\]\])/gi, '[[$1');

// removing artefacts (within citation templates)
         regex(/(\|[ ]*?author=)(?:(?:Mon|Tues|Wednes|Thurs|Fri|Satur|Sun)day,? ?)(?=[^\]\|\}]*\|)/gi, '$1');
         regex(/(?:&#124; (?:Daily|English|(?:Mail |)Online|Music|News|Indian Express))(?=\s*\|)+/gi, '');

         regex(/(?:(?:[ ]+&#124;[ ]+HighBeam Research[ ]+[-–][ ]+FREE trial[ ]+|)\|publisher=Highbeam.com)/gi, '');
         regex(/( &#124; [\w, ]*?)(?=[ ]&#124)/gi, '');
         regex(/( &#124; Comment is free)/gi, '');
         regex(/\|\s?title\s*\=\s*BBC (?:News|Sport)\s?(?:–|&#124; )\s?/gi, '|title=');
         regex(/(?:Football|international|Latest|local|UK|world|) News &#124;(?=[ ]?[&\|])/gi, '');
         regex(/(?:[-–|]|&#124;)[ ]*(?:Football|international|Latest|local|UK|world|) News(?=[ ]?[&\|])/gi, '');
         regex(/<!-- Bot generated title -->/gi, '');

         regex(/(DOC|PDF)\) \./gi, '$1).');

// removing artefacts (outside of citation templates)
         regex(/([\w]+\'\')\.(?:co(?:m|m?\.\w{2})|\.\w{2})[ ]/gi, '$1. ');

// removing other artefacts 
         regex(/(UEFA\]\])\.(?:co(?:m|m?\.\w{2})|\.\w{2})(?= ?[\|{}])/gi, '$1');

//dynamic columns for reflists; remove scroll bar
         regex(/((?:[Rr]eferences|[Nn]otes)[ ]?={2,4}[\n\r])[\r\n\s]*<div (?:style|class)=[^>]*>([\S\s]*)<\/div>/g, '$1$2'); 	
         regex(/(?:\{\{[Rr]eflist\}\}|<[Rr]eferences ?\/>)/g, '{{reflist|colwidth=30em}}');

}

function Ohc_sourceunlink() {
 var txt=document.editform.wpTextbox1;

//removal of all links from sources
//rem linking within citation template parameters
         regex(/(\|[ ]*?(?:author|agency|publisher|journal|newspaper|periodical|work)=[ ]*?)\[\[([^\|\]]+?)\]\](?:(,? )\[\[([^\|\]]+?)\]\]|)([ ]{0,1})/gi, '$1$2$3$4$5');
         regex(/(\|[ ]*?(?:author|agency|publisher|journal|newspaper|periodical|work)=[ ]*?)\[\[(?:[^\|\]]+?\|)([\w\s\,]+?)\]\](?:(,? )\[\[([^\|\]]+?)\]\]|)([ ]{0,1})/gi, '$1$2$3$4$5');

//rem other linking within refs tags
         regex(/(<ref[^>]*>[^<]+?[\]\.,;]\s+)\[\[(?:[^\|\]<]*\||)([^\|\]<]*)(?: online|)\]\]([^<]*?<\/ref>)\) \./gi, '$1$2$3).');
         regex(/(<ref[^>]*>[^<]+?[\]\.,;]\s+\'\')\[\[(?:[^\|\]<]*\||)([^\|\]<]*)(?: online|)\]\](\'\'[^<]*?<\/ref>)\) \./gi, '$1$2$3).');
}

/** ------------------------------------------------------------------------ **/
/// PROTECTION BY STRING SUBSTITUTION

var linkmap=[];
function ohc_protect_linkspam()
{
    // protects everything within reference tags 
    // the sensitive part is stored and replaced with a unique identifier,
    // which is later replaced with the stored part.
 
    var protect_function = function(s, begin, replace, end) {
        linkmap.push(replace);
        return begin + "⍌"+(linkmap.length-1)+"⍍" + end;
    };

    regex(/(<ref[^>]*?>)(.*?)(<\/ref>)/gi, protect_function);
    regex(/(\*[ ]?\[(?:https?:|ftp:))([^\]]*)(\])/gi, protect_function);

}


function ohc_unprotect_linkspam()
{

    //removes protection put in place by function ohc_protect_fmt (all cats, templates etc.)
    regex(/⍌([0-9]+)⍍/g, function(x, n) {
        var res = linkmap[n];
        res = res.replace(/⍌([0-9]+)⍍/g, function(x, n) {
            var res = linkmap[n];
            res = res.replace(/⍌([0-9]+)⍍/g, function(x, n) {
                var res = linkmap[n];
                res = res.replace(/⍌([0-9]+)⍍/g, function(x, n) {
                    return linkmap[n];
                });
                return res;
            });
            return res;
        });
        return res;
    });
}

var linkmap=[];
function ohc_protect_urls()
{
    // protects only urls 
    // the sensitive part is stored and replaced with a unique identifier,
    // which is later replaced with the stored part.
 
    var protect_function = function(s, begin, replace, end) {
        linkmap.push(replace);
        return begin + "⍌"+(linkmap.length-1)+"⍍" + end;
    };

// protect the rest (after purging urls inserted in ('website' or )'work' parameters)
    regex(/((?:[\[=]\s*)(?:https?:|ftp:))([^\]\|\}]*)(\s*[\]|}])/gi, protect_function);
    regex(/(\{\{(?:harv\w*|sfn\w*|cite ?book)\s?\|)([^\}]+)(\})/gi, protect_function);
    regex(/(\|\s*contribution\s*=)([^|}]+)(\|\})/gi, protect_function);

}


function ohc_unprotect_urls()
{

    //removes protection put in place by function ohc_protect_fmt (all cats, templates etc.)
    regex(/⍌([0-9]+)⍍/g, function(x, n) {
        var res = linkmap[n];
        res = res.replace(/⍌([0-9]+)⍍/g, function(x, n) {
            var res = linkmap[n];
            res = res.replace(/⍌([0-9]+)⍍/g, function(x, n) {
                var res = linkmap[n];
                res = res.replace(/⍌([0-9]+)⍍/g, function(x, n) {
                    return linkmap[n];
                });
                return res;
            });
            return res;
        });
        return res;
    });
}

/** ------------------------------------------------------------------------ **/
function Ohc_Source_edit_summary(){
	//Add a tag to the summary box
         setoptions(minor='true');
         setreason('per [[Help:Citation Style 1|CS1]], [[Template:Citation]] and [[MOS:ITALICS]] by [[User:Ohconfucius/script|script]]', 'append');
         doaction('diff');

}

function Ohc_linkspam_driver() {
 var txt=document.editform.wpTextbox1;

	ohc_protect_linkspam();
	Ohc_linkspam();
	ohc_unprotect_linkspam();
	setreason('rem [[WP:Linkspam|linkspam]]', 'append');
}

function Ohc_ref_format_new() {
 var txt=document.editform.wpTextbox1;

      Ohc_remove_urls();
      ohc_protect_urls();
      Ohc_sources_prep();
      ohc_foreign_dates();
      Ohc_unpipe();
      Ohc_dab_news_sources();
      Ohc_sourcename();
      Ohc_sourcework();
      Ohc_sourcepub();
      Ohc_sourceagency();
      Ohc_redo_pipe();
      Ohc_publishers();
      Ohc_sources_cleanup();
      ohc_unprotect_urls();


      Ohc_Source_edit_summary();
}

function Ohc_noverify() {
 var txt=document.editform.wpTextbox1;

// removing references to other WP articles and 'external' WP links
	regex(/<ref[^<>]*>[^<>]*\|[ ]*url ?=https?:\/\/(?:\w{2}\.wikipedia\.org\/wiki|(?:www\.|)(?:facebook|myspace|twitter)\.com)\/[^<>]*<\/ref>/gi, '');
	regex(/<ref>https?:\/\/(?:en\.wikipedia\.org\/wiki|(?:www\.|)(?:facebook|myspace|twitter)\.com)\/[^\s\]<]*<\/ref>/gi, '');
	regex(/<ref>\[https?:\/\/(?:en\.wikipedia\.org\/wiki|(?:www\.|)(?:facebook|myspace|twitter)\.com)\/[^\s\]]*[ ]+[\w\d][^\]]*\]<\/ref>/gi, '');
	regex(/\|[ ]*url[ ]*=[ ]*https?:\/\/(?:\w{2}\.wikipedia\.org\/wiki|(?:www\.|)(?:facebook|myspace|twitter)\.com)[^\s\|\{\}<]*(?=[ ]*[|}])/gi, '');


	regex(/[ ]\[https?:\/\/en\.wikipedia\.org\/wiki\/[^\s\]]*[ ]+([\w][^\]]*)\]/gi, ' [[$1]]');

}


addOnloadHook(function () {
 if(document.forms.editform) {
//  addPortletLink('p-tb', 'javascript:function_name()', 'Button name', 't-dmy', 'Hover text', '', '');

	addPortletLink('p-tb', 'javascript:Ohc_linkspam_driver()', 'Linkspam', 'rm-linkspam', 'removes linkspam in running text', '', '');
	addPortletLink('p-tb', 'javascript:Ohc_sourceunlink()', 'Unlink source name', 't-citefix', 'Unlinks source', '', '');
	addPortletLink('p-tb', 'javascript:Ohc_ref_format_new()', 'Fix SOURCES', 't-citefix', 'Run entire new module', '', '');
	addPortletLink('p-tb', 'javascript:Ohc_noverify()', 'Unverifiable', 'sort', 'sort', '', '');
//  addPortletLink('p-tb', 'javascript:Ohc_remove_urls()', 'Remove certain urls', 't-citefix', '0.Improper urls', '', '');
//  addPortletLink('p-tb', 'javascript:Ohc_sources_prep()', 'Prepare sources', 't-citefix', '1.Prepares sources', '', '');
//  addPortletLink('p-tb', 'javascript:Ohc_unpipe()', 'Unpipe sources', 't-citefix', '2.Unpipe linked sources', '', '');
//  addPortletLink('p-tb', 'javascript:Ohc_dab_news_sources()', 'Applies DAB', 't-citefix', '3.Applies disambiguation', '', '');
//  addPortletLink('p-tb', 'javascript:Ohc_sourcename()', 'Align source name', 't-citefix', '4.Corrects source name – subscript1', '', '');
//  addPortletLink('p-tb', 'javascript:Ohc_sourcework()', 'Cleanup WORK', 't-citefix', '5.Cleanup and reclassifies as WORK – subscript2', '', '');
//  addPortletLink('p-tb', 'javascript:Ohc_sourcepub()', 'Cleanup publisher', 't-citefix', '6.Cleanup and reclassifies as PUBLISHER – subscript2', '', '');
//  addPortletLink('p-tb', 'javascript:Ohc_sourceagency()', 'Cleanup agency', 't-citefix', '7.Cleanup and reclassifies as AGENCY – subscript2', '', '');
//  addPortletLink('p-tb', 'javascript:Ohc_redo_pipe()', 'Repipe sources', 't-citefix', '8.Repipe ambiguous links', '', '');
//  addPortletLink('p-tb', 'javascript:Ohc_publishers()', 'Rem publishers', 't-citefix', '9.Removing certain publishers fields', '', '');
//  addPortletLink('p-tb', 'javascript:Ohc_sources_cleanup()', 'Final cleanup', 't-citefix', '10.Cleanup after script actions', '', '');
  }
});