Jump to content

User:FlightTime/linkclassifier.js

From Wikipedia, the free encyclopedia
The printable version is no longer supported and may have rendering errors. Please update your browser bookmarks and please use the default browser print function instead.
Note: After saving, you have to bypass your browser's cache to see the changes. Google Chrome, Firefox, Microsoft Edge and Safari: Hold down the ⇧ Shift key and click the Reload toolbar button. For details and instructions about other browsers, see Wikipedia:Bypass your cache.
/* If you want to use this script, simply add the following line to your [[Special:Mypage/monobook.js]]:

importScript('User:FlightTime/linkclassifier.js'); // Linkback: [[User:FlightTime/linkclassifier.js]]

* (Please keep the comment so I can see how many people use this). You will also want to
* add some CSS classes, such as those at [[User:FlightTime/linkclassifier.css]].
*/

/* If you want this to run "on demand" instead of on every page, set "LinkClassifierOnDemand=true" and
 * use addPortletLink() or the like to add a button calling LinkClassifier.onDemand().
 */

var LinkClassifier = {
	/* This object maps classes to the categories for which to apply them. Values may be an array of strings or a regex. */
	cats: {
		deletion: [
			'Category:All articles proposed for deletion',
			'Category:All books proposed for deletion',
			'Category:All categories for discussion',
			'Category:All disputed non-free Wikipedia files',
			'Category:All files proposed for deletion',
			'Category:All orphaned non-free use Wikipedia files',
			'Category:All redirects for discussion',
			'Category:All replaceable non-free use Wikipedia files',
			'Category:All Wikipedia files with no non-free use rationale',
			'Category:All Wikipedia files with unknown copyright status',
			'Category:All Wikipedia files with unknown source',
			'Category:Articles for deletion',
			'Category:Articles for deletion using wrong syntax',
			'Category:Articles on deletion review',
			'Category:Articles to be merged after an Articles for deletion discussion',
			'Category:Candidates for speedy deletion',
			'Category:Candidates for undeletion',
			'Category:Categories for conversion',
			'Category:Categories for deletion',
			'Category:Categories for listifying',
			'Category:Categories for merging',
			'Category:Categories for renaming',
			'Category:Categories for speedy renaming',
			'Category:Categories to be listified then deleted',
			'Category:Empty categories awaiting deletion',
			'Category:Items pending OTRS confirmation of permission for over 30 days',
			'Category:Miscellaneous pages for deletion',
			'Category:Templates for deletion',
			'Category:Templates for merging',
			'Category:Wikipedia files for discussion'
		].sort(),
		disambiguation: [
			'Category:All disambiguation pages'
		].sort(),
		'set-index': [
			'Category:All set index articles'
		].sort(),
		'featured-content': [
			'Category:Featured articles',
			'Category:Featured lists',
			'Category:Featured pictures',
			'Category:Featured sounds',
			'Category:Featured videos',
			'Category:Featured portals'
		].sort(),
		'good-content': [
			'Category:Good articles'
		].sort(),
		'soft-redirect-cats': [
			'Category:Wikipedia soft redirected categories'
		].sort(),
		'spoken-articles': [
			'Category:Spoken articles'
		].sort(),
		stubcls: /^Category:.* stubs$/,
		'nonfree-media': [
			'Category:All non-free media'
		].sort(),
		unprintworthy: [
			'Category:Unprintworthy redirects',
			'Category:Middle-earth redirects from redundant titles'
		].sort(),
		'unprintworthy-shortcut': [
			'Category:Redirects from shortcuts'
		].sort(),
		'incorrect-title': [
			'Category:Redirects from incorrect disambiguation',
			'Category:Redirects from incorrect names',
			'Category:Redirects from miscapitalisations',
			'Category:Redirects from misspellings'
		].sort()
	},

	/* This object maps page props to CSS classes for which to apply them. Values may be an array of strings or a function returning such. */
	props: {
		disambiguation: [
			'disambiguation'
		]
	},

	/* This regex matches page titles to be marked as intentional links to disambiguation pages */
	intentionaldab: / \(disambiguation\)$/,

	/* Was it run already? */
	wasRun: false,

	onAjaxError: function ( xhr, textStatus, errorThrown ) {
		mw.log.error( 'AJAX error: ' + textStatus + ' ' + errorThrown );
	},

	callback: function ( r ) {
		var i, j, k, k2, v, node, alist, q, prefix, seen, cls,
			redir = {},
			redirlist = [],
			cats = {},
			missing = {},
			classes = {};

		if ( !r || !r.query ) {
			if ( !window.console || !$.isFunction( window.console.error ) ) {
				throw new Error( 'Bad response' );
			}
			window.console.error( 'Bad response', r );
			return;
		}
		if ( r['query-continue'] ) {
			q = this.rawdata;
			for ( k in r['query-continue'] ) {
				for ( k2 in r['query-continue'][k] ) {
					q[k2] = r['query-continue'][k][k2];
				}
			}
			$.ajax( {
				url: mw.util.wikiScript( 'api' ),
				dataType: 'json',
				type: 'POST',
				data: q,
				rawdata: this.rawdata,
				success: LinkClassifier.callback,
				error: LinkClassifier.onAjaxError
			} );
		}
		r = r.query;

		node = document.getElementById( 'wikiPreview' );
		if ( !node ) {
			node = document.getElementById( 'bodyContent' );
		}
		if ( !node ) {
			throw new Error( 'Huh? No body content?' );
		}
		alist = node.getElementsByTagName( 'A' );
		if ( alist.length === 0 ) {
			return;
		}

		if ( r.redirects ) {
			for ( i = r.redirects.length - 1; i >= 0; i-- ) {
				redir[r.redirects[i].from] = r.redirects[i].to;
				redirlist.push( r.redirects[i].from );
			}
		}
		if ( redirlist.length > 0 ) {
			q = {
				format: 'json',
				action: 'query',
				titles: redirlist.join( '|' ),
				prop: 'categories|info',
				inprop: 'protection',
				cllimit: 'max',
				rawcontinue: 1
			};
			$.ajax( {
				url: mw.util.wikiScript( 'api' ),
				dataType: 'json',
				type: 'POST',
				data: q,
				rawdata: q,
				success: LinkClassifier.callback,
				error: LinkClassifier.onAjaxError
			} );
		}

		prefix = this.rawdata.redirects ? '' : 'redir-';
		if ( r.pages ) {
			for ( i in r.pages ) {
				classes[r.pages[i].title] = [];
				missing[r.pages[i].title] = r.pages[i].missing !== undefined;
				if ( r.pages[i].categories ) {
					cats[r.pages[i].title] = r.pages[i].categories.map( function ( a ) {
						return a.title;
					} ).sort();
				}
				if ( r.pages[i].pageprops ) {
					for ( k in r.pages[i].pageprops ) {
						if ( !LinkClassifier.props[k] ) {
							continue;
						}
						v = LinkClassifier.props[k];
						if ( $.isFunction( v ) ) {
							v = v( r.pages[i].pageprops[k], k, r.pages[i].title );
						}
						classes[r.pages[i].title].push.apply( classes[r.pages[i].title], v );
					}
				}
				if ( r.pages[i].protection ) {
					seen = {};
					for ( j = r.pages[i].protection.length - 1; j >= 0; j-- ) {
						cls = prefix + 'protection-' + r.pages[i].protection[j].type + '-' + r.pages[i].protection[j].level;
						if ( !seen[cls] ) {
							seen[cls] = 1;
							classes[r.pages[i].title].push( cls );
						}
						if ( r.pages[i].protection[j].expiry === 'infinity' ) {
							cls += '-indef';
							if ( !seen[cls] ) {
								seen[cls] = 1;
								classes[r.pages[i].title].push( cls );
							}
						}
					}
				}
				if ( r.pages[i].flagged ) {
					if ( r.pages[i].lastrevid !== r.pages[i].flagged.stable_revid ) {
						classes[r.pages[i].title].push( 'needs-review' );
					}
				}
			}
		}
		Array.prototype.forEach.call( alist, function ( a ) {
			var cns, cls, m, i, j, pageCats, matchCats,
				$a = $( a );

			if ( a.wikipage === undefined ) {
				return;
			}
			if ( redir[a.wikipage] ) {
				$a.addClass( 'redirect' );
				a.wikipage = redir[a.wikipage];
				a.title = a.wikipage;
				cns = mw.config.get( 'wgCanonicalNamespace' );
				if ( a.wikipage === ( cns ? cns + ':' : '' ) + mw.config.get( 'wgTitle' ) ) {
					$a.addClass( 'self-redirect' );
				}
				if ( missing[a.wikipage] ) {
					$a.addClass( 'broken-redirect' );
				}
			}
			m = a.href.match( /#.*/ );
			if ( m && m[0].substr( 0, 10 ) !== '#cite_note' ) {
				try {
					// Modern MediaWiki doesn't normally do the dot-encoding thing anymore, but humans/scripts sometimes still do.
					a.title = a.title.replace( /#.*/, '' ) + decodeURIComponent( m[0].replace( /_/g, ' ' ).replace( /\.([0-9A-F][0-9A-F])/g, '%$1' ) );
				} catch ( e ) {
					// Malformed UTF8? Decode it as bytes.
					a.title = a.title.replace( /#.*/, '' ) + m[0].replace( /_/g, ' ' ).replace( /\.([0-9A-F][0-9A-F])/g, function ( x, n ) {
						return String.fromCharCode( parseInt( n, 16 ) );
					} );
				}
			}
			if ( LinkClassifier.intentionaldab.test( a.origwikipage ) ) {
				$a.addClass( 'intentional-disambiguation' );
			}
			if ( classes[a.wikipage] ) {
				for ( j = classes[a.wikipage].length - 1; j >= 0; j-- ) {
					$a.addClass( classes[a.wikipage][j] );
				}
			}
			if ( a.wikipage !== a.origwikipage && classes[a.origwikipage] ) {
				for ( j = classes[a.origwikipage].length - 1; j >= 0; j-- ) {
					$a.addClass( classes[a.origwikipage][j] );
				}
			}

			pageCats = [];
			if ( cats[a.wikipage] ) {
				pageCats = pageCats.concat( cats[a.wikipage] );
			}
			if ( a.wikipage !== a.origwikipage && cats[a.origwikipage] ) {
				pageCats = pageCats.concat( cats[a.origwikipage] );
			}
			if ( pageCats.length > 0 ) {
				pageCats = pageCats.sort();
				for ( cls in LinkClassifier.cats ) {
					i = pageCats.length - 1;
					matchCats = LinkClassifier.cats[cls];
					if ( matchCats instanceof RegExp ) {
						while ( i >= 0 ) {
							if ( matchCats.test( pageCats[i] ) ) {
								$a.addClass( cls );
								break;
							}
							i--;
						}
					} else {
						j = matchCats.length - 1;
						while ( i >= 0 && j >= 0 ) {
							if ( pageCats[i] === matchCats[j] ) {
								$a.addClass( cls );
								break;
							}
							if ( pageCats[i] > matchCats[j] ) {
								--i;
							} else {
								--j;
							}
						}
					}
				}
			}
		} );
	},

	draftsCallback: function ( r ) {
		var i, node, alist,
			found = {};

		if ( !r.query ) {
			if ( !window.console || !$.isFunction( window.console.error ) ) {
				throw new Error( 'Bad response' );
			}
			window.console.error( 'Bad response', r );
			return;
		}
		r = r.query;

		node = document.getElementById( 'wikiPreview' );
		if ( !node ) {
			node = document.getElementById( 'bodyContent' );
		}
		if ( !node ) {
			throw new Error( 'Huh? No body content?' );
		}
		alist = node.getElementsByTagName( 'A' );
		if ( alist.length === 0 ) {
			return;
		}

		if ( r.pages ) {
			for ( i in r.pages ) {
				found[r.pages[i].title] = r.pages[i].missing === undefined;
			}
		}
		Array.prototype.forEach.call( alist, function ( a ) {
			if ( a.wikipage !== undefined && found['Draft:' + a.origwikipage] ) {
				$( a ).addClass( 'has-draft' );
			}
		} );
	},

	getPageName: function ( url ) {
		var t, m = url.match( /\/wiki\/([^?#]+)/ );
		if ( !m ) {
			m = url.match( /\/w\/index.php\?(?:.*&)?title=([^&#]+)/ );
		}
		if ( !m ) {
			return '';
		}
		t = decodeURIComponent( m[1] ).replace( /_/g, ' ' );
		if ( t.substr( 0, 6 ) === 'Image:' ) {
			t = 'File:' + t.substr( 6 );
		}
		if ( t.substr( 0, 11 ) === 'Image talk:' ) {
			t = 'File talk:' + t.substr( 6 );
		}
		if ( t.substr( 0, 8 ) === 'Special:' ) {
			t = '';
		}
		return t;
	},

	classifyChildren: function ( node ) {
		mw.loader.using( [ 'mediawiki.util', 'mediawiki.user' ], function () {
			var alist, titles, draftTitles, re, self, props, i, k;

			LinkClassifier.wasRun = true;
			alist = node.getElementsByTagName( 'A' );
			if ( !alist.length ) {
				return;
			}
			self = LinkClassifier.getPageName( location.href );
			titles = Array.prototype.map.call( alist, function ( a ) {
				a.wikipage = '';
				if ( /(^|\s)(external|extiw)(\s|$)/.test( a.className ) ) {
					return '';
				}
				if ( !/(^|\s)(image|mw-file-description)(\s|$)/.test( a.className ) ) {
					a.className += ' nonimage';
				}
				a.wikipage = LinkClassifier.getPageName( a.href );
				if ( a.wikipage === self ) {
					a.wikipage = '';
				}
				a.origwikipage = a.wikipage;
				return a.wikipage;
			} ).sort().filter( function ( e, i, a ) {
				return e !== '' && ( i === 0 || a[i - 1] !== e );
			} );

			re = [];
			for ( k in mw.config.get( 'wgNamespaceIds' ) ) {
				if ( k !== '' ) {
					re.push( k.replace( /_/g, ' ' ) );
				}
			}
			re = new RegExp( '^(' + re.join( '|' ) + '):', 'i' );
			draftTitles = [];
			for ( i = titles.length - 1; i >= 0; i-- ) {
				if ( !re.test( titles[i] ) ) {
					draftTitles.push( 'Draft:' + titles[i] );
				}
			}

			props = [];
			for ( k in LinkClassifier.props ) {
				props.push( k );
			}

			function processLinks( limit ) {
				var q;
				while ( titles.length > 0 ) {
					q = {
						format: 'json',
						action: 'query',
						titles: titles.splice( 0, limit ).join( '|' ),
						prop: 'categories|pageprops|info|flagged',
						redirects: 1,
						cllimit: 'max',
						inprop: 'protection',
						rawcontinue: 1
					};
					if ( props.length <= limit ) {
						q.ppprop = props.join( '|' );
					}
					$.ajax( {
						url: mw.util.wikiScript( 'api' ),
						dataType: 'json',
						type: 'POST',
						data: q,
						rawdata: q,
						success: LinkClassifier.callback,
						error: LinkClassifier.onAjaxError
					} );
				}

				while ( draftTitles.length > 0 ) {
					q = {
						format: 'json',
						action: 'query',
						titles: draftTitles.splice( 0, limit ).join( '|' ),
						rawcontinue: 1
					};
					$.ajax( {
						url: mw.util.wikiScript( 'api' ),
						dataType: 'json',
						type: 'POST',
						data: q,
						rawdata: q,
						success: LinkClassifier.draftsCallback,
						error: LinkClassifier.onAjaxError
					} );
				}
			}

			if ( titles.length <= 100 ) {
				// Not worth querying the API to see if the user has apihighlimits
				processLinks( 50 );
			} else {
				// Note mw.user.getRights queries the API
				mw.user.getRights( function ( rights ) {
					processLinks( ( rights.indexOf( 'apihighlimits' ) >= 0 ) ? 500 : 50 );
				} );
			}
		} );
	},

	onLoad: function () {
		if ( window.LinkClassifierOnDemand ) {
			return;
		}
		if ( window.AJAXPreview ) {
			window.AJAXPreview.AddOnLoadHook( LinkClassifier.classifyChildren );
		}
		LinkClassifier.run( null );
	},

	onWikipageContent: function ( $content ) {
		if ( window.LinkClassifierOnDemand ) {
			return;
		}
		if ( window.AJAXPreview ) {
			window.AJAXPreview.AddOnLoadHook( LinkClassifier.classifyChildren );
		}
		$content.each( function ( i, n ) {
			LinkClassifier.run( n );
		})
	},

	onDemand: function () {
		LinkClassifier.run( null );
	},

	run: function ( node ) {
		mw.hook( 'LinkClassifier' ).fire( this );
		if ( ! node ) {
			document.getElementById( 'wikiPreview' );
		}
		if ( !node ) {
			node = document.getElementById( 'bodyContent' );
		}
		if ( node ) {
			LinkClassifier.classifyChildren( node );
		}
	},

	rerun: function () {
		if ( LinkClassifier.wasRun ) {
			LinkClassifier.onDemand();
		}
	}
};

if ( !window.LinkClassifierOnDemand ) {
	// $( document ).ready( LinkClassifier.onLoad );
	mw.hook('wikipage.content').add( LinkClassifier.onWikipageContent );
}