r20561 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r20560‎ | r20561 | r20562 >
Date:16:19, 19 March 2007
Author:raymond
Status:old
Tags:
Comment:
* (bug 8324) LinkSearch: search for all protocols defined in wgUrlProtocols incl. email
Make LinkFilter and GlobalFunctions more flexible
Modified paths:
  • /trunk/extensions/LinkSearch/LinkSearch.i18n.php (modified) (history)
  • /trunk/extensions/LinkSearch/LinkSearch.php (modified) (history)
  • /trunk/phase3/RELEASE-NOTES (modified) (history)
  • /trunk/phase3/includes/GlobalFunctions.php (modified) (history)
  • /trunk/phase3/includes/LinkFilter.php (modified) (history)

Diff [purge]

Index: trunk/phase3/includes/LinkFilter.php
@@ -50,7 +50,7 @@
5151 * @param $filterEntry String: domainparts
5252 * @param $prot String: protocol
5353 */
54 - function makeLike( $filterEntry , $prot = 'http' ) {
 54+ function makeLike( $filterEntry , $prot = 'http://' ) {
5555 if ( substr( $filterEntry, 0, 2 ) == '*.' ) {
5656 $subdomains = true;
5757 $filterEntry = substr( $filterEntry, 2 );
@@ -76,18 +76,32 @@
7777 $path = '/';
7878 $host = $filterEntry;
7979 }
80 - $host = strtolower( implode( '.', array_reverse( explode( '.', $host ) ) ) );
81 - if ( substr( $host, -1, 1 ) !== '.' ) {
82 - $host .= '.';
 80+ // Reverse the labels in the hostname, convert to lower case
 81+ // For emails reverse domainpart only
 82+ if ( $prot == 'mailto:' && strpos($host, '@') ) {
 83+ // complete email adress
 84+ $mailparts = explode( '@', $host );
 85+ $domainpart = strtolower( implode( '.', array_reverse( explode( '.', $mailparts[1] ) ) ) );
 86+ $host = $domainpart . '@' . $mailparts[0];
 87+ $like = "$prot$host%";
 88+ } elseif ( $prot == 'mailto:' ) {
 89+ // domainpart of email adress only. do not add '.'
 90+ $host = strtolower( implode( '.', array_reverse( explode( '.', $host ) ) ) );
 91+ $like = "$prot$host%";
 92+ } else {
 93+ $host = strtolower( implode( '.', array_reverse( explode( '.', $host ) ) ) );
 94+ if ( substr( $host, -1, 1 ) !== '.' ) {
 95+ $host .= '.';
 96+ }
 97+ $like = "$prot$host";
 98+
 99+ if ( $subdomains ) {
 100+ $like .= '%';
 101+ }
 102+ if ( !$subdomains || $path !== '/' ) {
 103+ $like .= $path . '%';
 104+ }
83105 }
84 - $like = "$prot://$host";
85 -
86 - if ( $subdomains ) {
87 - $like .= '%';
88 - }
89 - if ( !$subdomains || $path !== '/' ) {
90 - $like .= $path . '%';
91 - }
92106 return $like;
93107 }
94108 }
Index: trunk/phase3/includes/GlobalFunctions.php
@@ -1932,22 +1932,45 @@
19331933 * Make a URL index, appropriate for the el_index field of externallinks.
19341934 */
19351935 function wfMakeUrlIndex( $url ) {
 1936+ global $wgUrlProtocols; // Allow all protocols defined in DefaultSettings/LocalSettings.php
 1937+ $bits = parse_url( $url );
19361938 wfSuppressWarnings();
1937 - $bits = parse_url( $url );
1938 - $prots = array( 'http', 'https', 'ftp', 'irc', 'news' );
19391939 wfRestoreWarnings();
1940 - if ( !$bits || !in_array( $bits['scheme'], $prots ) ) {
 1940+ if ( !$bits ) {
19411941 return false;
19421942 }
 1943+ // most of the protocols are followed by ://, but mailto: and sometimes news: not, check for it
 1944+ $delimiter = '';
 1945+ if ( in_array( $bits['scheme'] . '://' , $wgUrlProtocols ) ) {
 1946+ $delimiter = '://';
 1947+ } elseif ( in_array( $bits['scheme'] .':' , $wgUrlProtocols ) ) {
 1948+ $delimiter = ':';
 1949+ // parse_url detects for news: and mailto: the host part of an url as path
 1950+ // We have to correct this wrong detection
 1951+ if ( isset ( $bits['path'] ) ) {
 1952+ $bits['host'] = $bits['path'];
 1953+ $bits['path'] = '';
 1954+ }
 1955+ } else {
 1956+ return false;
 1957+ }
 1958+
19431959 // Reverse the labels in the hostname, convert to lower case
1944 - $reversedHost = strtolower( implode( '.', array_reverse( explode( '.', $bits['host'] ) ) ) );
 1960+ // For emails reverse domainpart only
 1961+ if ( $bits['scheme'] == 'mailto' ) {
 1962+ $mailparts = explode( '@', $bits['host'] );
 1963+ $domainpart = strtolower( implode( '.', array_reverse( explode( '.', $mailparts[1] ) ) ) );
 1964+ $reversedHost = $domainpart . '@' . $mailparts[0];
 1965+ } else {
 1966+ $reversedHost = strtolower( implode( '.', array_reverse( explode( '.', $bits['host'] ) ) ) );
 1967+ }
19451968 // Add an extra dot to the end
19461969 if ( substr( $reversedHost, -1, 1 ) !== '.' ) {
19471970 $reversedHost .= '.';
19481971 }
19491972 // Reconstruct the pseudo-URL
19501973 $prot = $bits['scheme'];
1951 - $index = "$prot://$reversedHost";
 1974+ $index = "$prot$delimiter$reversedHost";
19521975 // Leave out user and password. Add the port, path, query and fragment
19531976 if ( isset( $bits['port'] ) ) $index .= ':' . $bits['port'];
19541977 if ( isset( $bits['path'] ) ) {
Index: trunk/phase3/RELEASE-NOTES
@@ -274,12 +274,11 @@
275275 * (bug 5546) Watchlist reflects logged actions like move, protection, undelete
276276 * (bug 9019) No warning during upload if image description page exists, but no
277277 image
 278+* Support protocols other than HTTP in LinkFilter, use $wgUrlProtocols
278279 * (bug 8582) Allow thumbnailing when imagesize has a space.
279280 * (bug 8716) Change math_inputhash and math_outputhash to byte for Postgres
280281 * (bug 8558) Correct display of timestamps on some pages when using Postgres
281282
282 -Support protocols other than HTTP in LinkFilter
283 -
284283 == Languages updated ==
285284
286285 * Arabic (ar)
Index: trunk/extensions/LinkSearch/LinkSearch.i18n.php
@@ -12,9 +12,7 @@
1313 'linksearch-pat' => 'Search pattern:',
1414 'linksearch-ns' => 'Namespace:',
1515 'linksearch-ok' => 'Search',
16 - 'linksearch-text' => 'Wildcards such as " *.wikipedia.org " may be used.
17 -
18 -The default protocol is http. Other possible protocols: https, ftp, irc, news.',
 16+ 'linksearch-text' => 'Wildcards such as " *.wikipedia.org " may be used.',
1917 'linksearch-line' => '$1 linked from $2',
2018 'linksearch-error' => 'Wildcards may appear only at the start of the hostname.',
2119 );
@@ -29,10 +27,7 @@
3028 'linksearch-pat' => 'Suchmuster:',
3129 'linksearch-ns' => 'Namensraum:',
3230 'linksearch-ok' => 'Suche',
33 - 'linksearch-text' => 'Diese Spezialseite ermöglicht die Suche nach Seiten, in denen bestimmte Weblinks enthalten sind. Dabei können Wildcards wie beispielsweise <tt>*.example.com</tt> benutzt werden.
34 -
35 - Standardmäßig wird nur nach http:// gesucht. Weitere unterstützte Protokolle: https, ftp, irc und news.',
36 - 'linksearch-line' => '$1 ist verlinkt von $2',
 31+ 'linksearch-text' => 'Diese Spezialseite ermöglicht die Suche nach Seiten, in denen bestimmte Weblinks enthalten sind. Dabei können Wildcards wie beispielsweise <tt>*.example.com</tt> benutzt werden.', 'linksearch-line' => '$1 ist verlinkt von $2',
3732 'linksearch-error' => 'Wildcards können nur am Anfang der URL verwendet werden.',
3833 );
3934 $wgLinkSearchMessages['fi'] = array(
@@ -55,9 +50,7 @@
5651 'linksearch-pat' => 'קישור לחיפוש:',
5752 'linksearch-ns' => 'מרחב שם:',
5853 'linksearch-ok' => 'חיפוש',
59 - 'linksearch-text' => 'ניתן להשתמש בתווים כללים, לדוגמה "‎*.wikipedia.org".
60 -
61 -פרוטוקול ברירת המחדל הוא HTTP. פרוטוקולים אפשריים אחרים: HTTPS,‏ FTP,‏ IRC,‏ NEWS.',
 54+ 'linksearch-text' => 'ניתן להשתמש בתווים כללים, לדוגמה "‎*.wikipedia.org"',
6255 'linksearch-line' => '$1 מקושר מהדף $2',
6356 'linksearch-error' => 'תווים כלליים יכולים להופיע רק בתחילת שם השרת.',
6457 );
Index: trunk/extensions/LinkSearch/LinkSearch.php
@@ -28,9 +28,10 @@
2929 /*function*/ false, /*file*/ false );
3030
3131 class LinkSearchPage extends QueryPage {
32 - function __construct( $query , $ns ) {
 32+ function __construct( $query , $ns , $prot ) {
3333 $this->mQuery = $query;
3434 $this->mNs = $ns;
 35+ $this->mProt = $prot;
3536 }
3637
3738 function getName() {
@@ -47,37 +48,20 @@
4849 /**
4950 * Return an appropriately formatted LIKE query
5051 */
51 - static function mungeQuery( $query ) {
52 - $prot = 'http'; // use http as standard
53 - if( substr( $query, 0, 7 ) == 'http://' ) {
54 - $query = substr( $query, 7 );
55 - } elseif( substr( $query, 0, 8 ) == 'https://' ) {
56 - $query = substr( $query, 8 );
57 - $prot = 'https';
58 - } elseif( substr( $query, 0, 6 ) == 'ftp://' ) {
59 - $query = substr( $query, 6 );
60 - $prot = 'ftp';
61 - } elseif( substr( $query, 0, 6 ) == 'irc://' ) {
62 - $query = substr( $query, 6 );
63 - $prot = 'irc';
64 - } elseif( substr( $query, 0, 7 ) == 'news://' ) {
65 - $query = substr( $query, 7 );
66 - $prot = 'news';
67 - }
 52+ static function mungeQuery( $query , $prot ) {
6853 return LinkFilter::makeLike( $query , $prot );
6954 }
7055
7156 function linkParameters() {
72 - return array( 'target' => $this->mQuery, 'namespace' => $this->mNs );
 57+ return array( 'target' => $this->mQuery, 'namespace' => $this->mNs , 'protocol' => $this->mProt );
7358 }
7459
7560 function getSQL() {
7661 $dbr = wfGetDB( DB_SLAVE );
7762 $page = $dbr->tableName( 'page' );
7863 $externallinks = $dbr->tableName( 'externallinks' );
79 - $encSearch = $dbr->addQuotes( self::mungeQuery( $this->mQuery ) );
 64+ $encSearch = $dbr->addQuotes( self::mungeQuery( $this->mQuery, $this->mProt ) );
8065 $encSQL = '';
81 -
8266 if ( isset ($this->mNs) ) $encSQL = 'AND page_namespace=' . $this->mNs;
8367 return
8468 "SELECT
@@ -108,7 +92,7 @@
10993 */
11094 function doQuery( $offset, $limit ) {
11195 global $wgOut;
112 - $this->mMungedQuery = LinkSearchPage::mungeQuery( $this->mQuery );
 96+ $this->mMungedQuery = LinkSearchPage::mungeQuery( $this->mQuery, $this->mProt );
11397 if( $this->mMungedQuery === false ) {
11498 $wgOut->addWikiText( wfMsg( 'linksearch-error' ) );
11599 } else {
@@ -131,27 +115,36 @@
132116
133117 function wfSpecialLinksearch( $par=null, $ns=null ) {
134118 list( $limit, $offset ) = wfCheckLimits();
135 - global $wgOut, $wgRequest;
 119+ global $wgOut, $wgRequest, $wgUrlProtocols;
136120 $target = $GLOBALS['wgRequest']->getVal( 'target', $par );
137121 $namespace = $GLOBALS['wgRequest']->getIntorNull( 'namespace', $ns );
 122+ $protocol = $GLOBALS['wgRequest']->getVal( 'protocol', $prot );
138123 $self = Title::makeTitle( NS_SPECIAL, 'Linksearch' );
139124
140 -
141 - $wgOut->addWikiText( wfMsg( 'linksearch-text' ) );
142 - $wgOut->addHtml(
143 - Xml::openElement( 'form', array( 'method' => 'get', 'action' => $GLOBALS['wgScript'] ) ) .
 125+ $wgOut->addWikiText( wfMsg( 'linksearch-text', '<nowiki>' . implode( ', ', $wgUrlProtocols) . '</nowiki>' ) );
 126+ $s = Xml::openElement( 'form', array( 'method' => 'get', 'action' => $GLOBALS['wgScript'] ) ) .
144127 Xml::hidden( 'title', $self->getPrefixedDbKey() ) .
145128 '<fieldset>' .
146129 Xml::element( 'legend', array(), wfMsg( 'linksearch' ) ) .
147 - Xml::inputLabel( wfMsg( 'linksearch-pat' ), 'target', 'target', 50 , $target ) . '<br />' .
 130+ Xml::label( wfMsg( 'linksearch-pat' ), 'target' ) . ' ' .
 131+ "<select id='protocol' name='protocol' class='protocolselector'>";
 132+ foreach( $wgUrlProtocols as $prot ) {
 133+ if ( $prot == $protocol ) {
 134+ $s .= Xml::option( $prot, $prot, true );
 135+ } else {
 136+ $s .= Xml::option( $prot, $prot );
 137+ }
 138+ }
 139+ $s .= Xml::input( 'target', 50 , $target ) . '<br />' .
148140 Xml::label( wfMsg( 'linksearch-ns' ), 'namespace' ) .
149141 XML::namespaceSelector( $namespace, '' ) .
150142 Xml::submitButton( wfMsg( 'linksearch-ok' ) ) .
151143 '</fieldset>' .
152 - Xml::closeElement( 'form' ) );
 144+ Xml::closeElement( 'form' );
 145+ $wgOut->addHtml( $s );
153146
154147 if( $target != '' ) {
155 - $searcher = new LinkSearchPage( $target, $namespace );
 148+ $searcher = new LinkSearchPage( $target, $namespace, $protocol );
156149 $searcher->doQuery( $offset, $limit );
157150 }
158151 }

Past revisions this follows-up on

RevisionCommit summaryAuthorDate
r20530* (bug 8324) LinkSearch: search for https/ftp/irc/news weblinks...raymond21:49, 16 March 2007