MediaWiki REL1_34
LinksUpdateHookHandler.php
Go to the documentation of this file.
1<?php
2
4
6use Exception;
7use File;
9use Http;
10use LinksUpdate;
11use PageImages;
12use Title;
13use Revision;
14
23
31 public static function onLinksUpdate( LinksUpdate $linksUpdate ) {
32 $handler = new self();
33 $handler->doLinksUpdate( $linksUpdate );
34 }
35
43 public function getPageImageCandidates( LinksUpdate $linksUpdate ) {
44 global $wgPageImagesLeadSectionOnly;
45 $po = false;
46
47 if ( $wgPageImagesLeadSectionOnly ) {
48 $rev = $linksUpdate->getRevision();
49 if ( !$rev ) {
50 // Use READ_LATEST (T221763)
51 $rev = Revision::newFromTitle( $linksUpdate->getTitle(), 0,
52 Revision::READ_LATEST );
53 }
54 if ( $rev ) {
55 $content = $rev->getContent();
56 if ( $content ) {
57 $section = $content->getSection( 0 );
58
59 // Certain content types e.g. AbstractContent return null if sections do not apply
60 if ( $section ) {
61 $po = $section->getParserOutput( $linksUpdate->getTitle() );
62 }
63 }
64 }
65 } else {
66 $po = $linksUpdate->getParserOutput();
67 }
68
69 return $po ? $po->getExtensionData( 'pageImages' ) : [];
70 }
71
75 public function doLinksUpdate( LinksUpdate $linksUpdate ) {
76 $images = $this->getPageImageCandidates( $linksUpdate );
77
78 if ( $images === null ) {
79 return;
80 }
81
82 $scores = [];
83 $counter = 0;
84
85 foreach ( $images as $image ) {
86 $fileName = $image['filename'];
87
88 if ( !isset( $scores[$fileName] ) ) {
89 $scores[$fileName] = -1;
90 }
91
92 $scores[$fileName] = max( $scores[$fileName], $this->getScore( $image, $counter++ ) );
93 }
94
95 $image = false;
96 $free_image = false;
97
98 foreach ( $scores as $name => $score ) {
99 if ( $score > 0 ) {
100 if ( !$image || $score > $scores[$image] ) {
101 $image = $name;
102 }
103 if ( ( !$free_image || $score > $scores[$free_image] ) && $this->isImageFree( $name ) ) {
104 $free_image = $name;
105 }
106 }
107 }
108
109 if ( $free_image ) {
110 $linksUpdate->mProperties[PageImages::getPropName( true )] = $free_image;
111 }
112
113 // Only store the image if it's not free. Free image (if any) has already been stored above.
114 if ( $image && $image !== $free_image ) {
115 $linksUpdate->mProperties[PageImages::getPropName( false )] = $image;
116 }
117 }
118
128 protected function getScore( array $image, $position ) {
129 global $wgPageImagesScores;
130
131 if ( isset( $image['handler'] ) ) {
132 // Standalone image
133 $score = $this->scoreFromTable( $image['handler']['width'], $wgPageImagesScores['width'] );
134 } else {
135 // From gallery
136 $score = $this->scoreFromTable( $image['fullwidth'], $wgPageImagesScores['galleryImageWidth'] );
137 }
138
139 if ( isset( $wgPageImagesScores['position'][$position] ) ) {
140 $score += $wgPageImagesScores['position'][$position];
141 }
142
143 $ratio = intval( $this->getRatio( $image ) * 10 );
144 $score += $this->scoreFromTable( $ratio, $wgPageImagesScores['ratio'] );
145
146 $blacklist = $this->getBlacklist();
147 if ( isset( $blacklist[$image['filename']] ) ) {
148 $score = -1000;
149 }
150
151 return $score;
152 }
153
163 protected function scoreFromTable( $value, array $scores ) {
164 $lastScore = 0;
165
166 // The loop stops at the *first* match, and therefore *requires* the input array keys to be
167 // in increasing order.
168 ksort( $scores, SORT_NUMERIC );
169 foreach ( $scores as $upperBoundary => $score ) {
170 $lastScore = $score;
171
172 if ( $value <= $upperBoundary ) {
173 break;
174 }
175 }
176
177 if ( !is_numeric( $lastScore ) ) {
178 wfLogWarning( 'The PageImagesScores setting must only contain numeric values!' );
179 }
180
181 return (float)$lastScore;
182 }
183
190 protected function isImageFree( $fileName ) {
191 $file = wfFindFile( $fileName );
192 if ( $file ) {
193 // Process copyright metadata from CommonsMetadata, if present.
194 // Image is considered free if the value is '0' or unset.
195 return empty( $this->fetchFileMetadata( $file )['NonFree']['value'] );
196 }
197 return true;
198 }
199
206 protected function fetchFileMetadata( $file ) {
207 $format = new FormatMetadata;
208 $context = new DerivativeContext( $format->getContext() );
209 // we don't care about the language, and specifying singleLanguage is slightly faster
210 $format->setSingleLanguage( true );
211 // we don't care about the language, so avoid splitting the cache by selecting English
212 $context->setLanguage( 'en' );
213 $format->setContext( $context );
214 return $format->fetchExtendedMetadata( $file );
215 }
216
224 protected function getRatio( array $image ) {
225 $width = $image['fullwidth'];
226 $height = $image['fullheight'];
227
228 if ( !$width || !$height ) {
229 return 0;
230 }
231
232 return $width / $height;
233 }
234
241 protected function getBlacklist() {
242 global $wgPageImagesBlacklist, $wgPageImagesBlacklistExpiry, $wgMemc;
243 static $list = false;
244
245 if ( $list !== false ) {
246 return $list;
247 }
248
249 $key = wfMemcKey( 'pageimages', 'blacklist' );
250 $list = $wgMemc->get( $key );
251 if ( $list !== false ) {
252 return $list;
253 }
254
255 wfDebug( __METHOD__ . "(): cache miss\n" );
256 $list = [];
257
258 foreach ( $wgPageImagesBlacklist as $source ) {
259 switch ( $source['type'] ) {
260 case 'db':
261 $list = array_merge( $list, $this->getDbBlacklist( $source['db'], $source['page'] ) );
262 break;
263 case 'url':
264 $list = array_merge( $list, $this->getUrlBlacklist( $source['url'] ) );
265 break;
266 default:
267 throw new Exception(
268 __METHOD__ . "(): unrecognized image blacklist type '{$source['type']}'" );
269 }
270 }
271
272 $list = array_flip( $list );
273 $wgMemc->set( $key, $list, $wgPageImagesBlacklistExpiry );
274 return $list;
275 }
276
285 private function getDbBlacklist( $dbName, $page ) {
286 $dbr = wfGetDB( DB_REPLICA, [], $dbName );
287 $title = Title::newFromText( $page );
288 $list = [];
289
290 $id = $dbr->selectField(
291 'page',
292 'page_id',
293 [ 'page_namespace' => $title->getNamespace(), 'page_title' => $title->getDBkey() ],
294 __METHOD__
295 );
296
297 if ( $id ) {
298 $res = $dbr->select( 'pagelinks',
299 'pl_title',
300 [ 'pl_from' => $id, 'pl_namespace' => NS_FILE ],
301 __METHOD__
302 );
303 foreach ( $res as $row ) {
304 $list[] = $row->pl_title;
305 }
306 }
307
308 return $list;
309 }
310
320 private function getUrlBlacklist( $url ) {
321 global $wgFileExtensions;
322
323 $list = [];
324 $text = Http::get( $url, [ 'timeout' => 3 ], __METHOD__ );
325 $regex = '/\[\[:([^|\#]*?\.(?:' . implode( '|', $wgFileExtensions ) . '))/i';
326
327 if ( $text && preg_match_all( $regex, $text, $matches ) ) {
328 foreach ( $matches[1] as $s ) {
329 $t = Title::makeTitleSafe( NS_FILE, $s );
330
331 if ( $t ) {
332 $list[] = $t->getDBkey();
333 }
334 }
335 }
336
337 return $list;
338 }
339
340}
$wgFileExtensions
This is the list of preferred extensions for uploading files.
wfDebug( $text, $dest='all', array $context=[])
Sends a line to the debug log if enabled or, optionally, to a comment in output.
wfGetDB( $db, $groups=[], $wiki=false)
Get a Database object.
wfFindFile( $title, $options=[])
Find a file.
wfLogWarning( $msg, $callerOffset=1, $level=E_USER_WARNING)
Send a warning as a PHP error and the debug log.
wfMemcKey(... $args)
Make a cache key for the local wiki.
$wgMemc
Definition Setup.php:790
An IContextSource implementation which will inherit context from another source but allow individual ...
Implements some public methods and some protected utility functions which are required by multiple ch...
Definition File.php:61
Format Image metadata values into a human readable form.
Various HTTP related functions.
Definition Http.php:29
static get( $url, array $options=[], $caller=__METHOD__)
Simple wrapper for Http::request( 'GET' )
Definition Http.php:64
Class the manages updates of *_link tables as well as similar extension-managed tables.
getTitle()
Return the title object of the page being updated.
getParserOutput()
Returns parser output.
Handler for the "LinksUpdate" hook.
getBlacklist()
Returns a list of images blacklisted from influencing this extension's output.
getDbBlacklist( $dbName, $page)
Returns list of images linked by the given blacklist page.
scoreFromTable( $value, array $scores)
Returns score based on table of ranges.
getUrlBlacklist( $url)
Returns list of images on given remote blacklist page.
static onLinksUpdate(LinksUpdate $linksUpdate)
LinksUpdate hook handler, sets at most 2 page properties depending on images on page.
getRatio(array $image)
Returns width/height ratio of an image as displayed or 0 is not available.
isImageFree( $fileName)
Check whether image's copyright allows it to be used freely.
getPageImageCandidates(LinksUpdate $linksUpdate)
Returns a list of page image candidates for consideration for scoring algorithm.
getScore(array $image, $position)
Returns score for image, the more the better, if it is less than zero, the image shouldn't be used fo...
static newFromTitle(LinkTarget $linkTarget, $id=0, $flags=0)
Load either the current, or a specified, revision that's attached to a given link target.
Definition Revision.php:138
Represents a title within MediaWiki.
Definition Title.php:42
const NS_FILE
Definition Defines.php:75
$context
Definition load.php:45
$source
const DB_REPLICA
Definition defines.php:25
$content
Definition router.php:78
if(PHP_SAPI !='cli-server') if(!isset( $_SERVER['SCRIPT_FILENAME'])) $file
Item class for a filearchive table row.
Definition router.php:42