MediaWiki  1.34.0
LinksUpdateHookHandler.php
Go to the documentation of this file.
1 <?php
2 
3 namespace PageImages\Hooks;
4 
6 use Exception;
7 use File;
9 use Http;
10 use LinksUpdate;
11 use PageImages;
12 use Title;
13 use Revision;
14 
23 
31  public static function onLinksUpdate( LinksUpdate $linksUpdate ) {
32  $handler = new self();
33  $handler->doLinksUpdate( $linksUpdate );
34  }
35 
43  public function getPageImageCandidates( LinksUpdate $linksUpdate ) {
44  global $wgPageImagesLeadSectionOnly;
45  $po = false;
46 
47  if ( $wgPageImagesLeadSectionOnly ) {
48  $rev = $linksUpdate->getRevision();
49  if ( !$rev ) {
50  // Use READ_LATEST (T221763)
51  $rev = Revision::newFromTitle( $linksUpdate->getTitle(), 0,
52  Revision::READ_LATEST );
53  }
54  if ( $rev ) {
55  $content = $rev->getContent();
56  if ( $content ) {
57  $section = $content->getSection( 0 );
58 
59  // Certain content types e.g. AbstractContent return null if sections do not apply
60  if ( $section ) {
61  $po = $section->getParserOutput( $linksUpdate->getTitle() );
62  }
63  }
64  }
65  } else {
66  $po = $linksUpdate->getParserOutput();
67  }
68 
69  return $po ? $po->getExtensionData( 'pageImages' ) : [];
70  }
71 
75  public function doLinksUpdate( LinksUpdate $linksUpdate ) {
76  $images = $this->getPageImageCandidates( $linksUpdate );
77 
78  if ( $images === null ) {
79  return;
80  }
81 
82  $scores = [];
83  $counter = 0;
84 
85  foreach ( $images as $image ) {
86  $fileName = $image['filename'];
87 
88  if ( !isset( $scores[$fileName] ) ) {
89  $scores[$fileName] = -1;
90  }
91 
92  $scores[$fileName] = max( $scores[$fileName], $this->getScore( $image, $counter++ ) );
93  }
94 
95  $image = false;
96  $free_image = false;
97 
98  foreach ( $scores as $name => $score ) {
99  if ( $score > 0 ) {
100  if ( !$image || $score > $scores[$image] ) {
101  $image = $name;
102  }
103  if ( ( !$free_image || $score > $scores[$free_image] ) && $this->isImageFree( $name ) ) {
104  $free_image = $name;
105  }
106  }
107  }
108 
109  if ( $free_image ) {
110  $linksUpdate->mProperties[PageImages::getPropName( true )] = $free_image;
111  }
112 
113  // Only store the image if it's not free. Free image (if any) has already been stored above.
114  if ( $image && $image !== $free_image ) {
115  $linksUpdate->mProperties[PageImages::getPropName( false )] = $image;
116  }
117  }
118 
128  protected function getScore( array $image, $position ) {
129  global $wgPageImagesScores;
130 
131  if ( isset( $image['handler'] ) ) {
132  // Standalone image
133  $score = $this->scoreFromTable( $image['handler']['width'], $wgPageImagesScores['width'] );
134  } else {
135  // From gallery
136  $score = $this->scoreFromTable( $image['fullwidth'], $wgPageImagesScores['galleryImageWidth'] );
137  }
138 
139  if ( isset( $wgPageImagesScores['position'][$position] ) ) {
140  $score += $wgPageImagesScores['position'][$position];
141  }
142 
143  $ratio = intval( $this->getRatio( $image ) * 10 );
144  $score += $this->scoreFromTable( $ratio, $wgPageImagesScores['ratio'] );
145 
146  $blacklist = $this->getBlacklist();
147  if ( isset( $blacklist[$image['filename']] ) ) {
148  $score = -1000;
149  }
150 
151  return $score;
152  }
153 
163  protected function scoreFromTable( $value, array $scores ) {
164  $lastScore = 0;
165 
166  // The loop stops at the *first* match, and therefore *requires* the input array keys to be
167  // in increasing order.
168  ksort( $scores, SORT_NUMERIC );
169  foreach ( $scores as $upperBoundary => $score ) {
170  $lastScore = $score;
171 
172  if ( $value <= $upperBoundary ) {
173  break;
174  }
175  }
176 
177  if ( !is_numeric( $lastScore ) ) {
178  wfLogWarning( 'The PageImagesScores setting must only contain numeric values!' );
179  }
180 
181  return (float)$lastScore;
182  }
183 
190  protected function isImageFree( $fileName ) {
191  $file = wfFindFile( $fileName );
192  if ( $file ) {
193  // Process copyright metadata from CommonsMetadata, if present.
194  // Image is considered free if the value is '0' or unset.
195  return empty( $this->fetchFileMetadata( $file )['NonFree']['value'] );
196  }
197  return true;
198  }
199 
206  protected function fetchFileMetadata( $file ) {
207  $format = new FormatMetadata;
208  $context = new DerivativeContext( $format->getContext() );
209  // we don't care about the language, and specifying singleLanguage is slightly faster
210  $format->setSingleLanguage( true );
211  // we don't care about the language, so avoid splitting the cache by selecting English
212  $context->setLanguage( 'en' );
213  $format->setContext( $context );
214  return $format->fetchExtendedMetadata( $file );
215  }
216 
224  protected function getRatio( array $image ) {
225  $width = $image['fullwidth'];
226  $height = $image['fullheight'];
227 
228  if ( !$width || !$height ) {
229  return 0;
230  }
231 
232  return $width / $height;
233  }
234 
241  protected function getBlacklist() {
242  global $wgPageImagesBlacklist, $wgPageImagesBlacklistExpiry, $wgMemc;
243  static $list = false;
244 
245  if ( $list !== false ) {
246  return $list;
247  }
248 
249  $key = wfMemcKey( 'pageimages', 'blacklist' );
250  $list = $wgMemc->get( $key );
251  if ( $list !== false ) {
252  return $list;
253  }
254 
255  wfDebug( __METHOD__ . "(): cache miss\n" );
256  $list = [];
257 
258  foreach ( $wgPageImagesBlacklist as $source ) {
259  switch ( $source['type'] ) {
260  case 'db':
261  $list = array_merge( $list, $this->getDbBlacklist( $source['db'], $source['page'] ) );
262  break;
263  case 'url':
264  $list = array_merge( $list, $this->getUrlBlacklist( $source['url'] ) );
265  break;
266  default:
267  throw new Exception(
268  __METHOD__ . "(): unrecognized image blacklist type '{$source['type']}'" );
269  }
270  }
271 
272  $list = array_flip( $list );
273  $wgMemc->set( $key, $list, $wgPageImagesBlacklistExpiry );
274  return $list;
275  }
276 
285  private function getDbBlacklist( $dbName, $page ) {
286  $dbr = wfGetDB( DB_REPLICA, [], $dbName );
287  $title = Title::newFromText( $page );
288  $list = [];
289 
290  $id = $dbr->selectField(
291  'page',
292  'page_id',
293  [ 'page_namespace' => $title->getNamespace(), 'page_title' => $title->getDBkey() ],
294  __METHOD__
295  );
296 
297  if ( $id ) {
298  $res = $dbr->select( 'pagelinks',
299  'pl_title',
300  [ 'pl_from' => $id, 'pl_namespace' => NS_FILE ],
301  __METHOD__
302  );
303  foreach ( $res as $row ) {
304  $list[] = $row->pl_title;
305  }
306  }
307 
308  return $list;
309  }
310 
320  private function getUrlBlacklist( $url ) {
321  global $wgFileExtensions;
322 
323  $list = [];
324  $text = Http::get( $url, [ 'timeout' => 3 ], __METHOD__ );
325  $regex = '/\[\[:([^|\#]*?\.(?:' . implode( '|', $wgFileExtensions ) . '))/i';
326 
327  if ( $text && preg_match_all( $regex, $text, $matches ) ) {
328  foreach ( $matches[1] as $s ) {
330 
331  if ( $t ) {
332  $list[] = $t->getDBkey();
333  }
334  }
335  }
336 
337  return $list;
338  }
339 
340 }
Title\newFromText
static newFromText( $text, $defaultNamespace=NS_MAIN)
Create a new Title from text, such as what one would find in a link.
Definition: Title.php:316
PageImages\Hooks\LinksUpdateHookHandler\getRatio
getRatio(array $image)
Returns width/height ratio of an image as displayed or 0 is not available.
Definition: LinksUpdateHookHandler.php:224
PageImages\Hooks\LinksUpdateHookHandler\fetchFileMetadata
fetchFileMetadata( $file)
Fetch file metadata.
Definition: LinksUpdateHookHandler.php:206
LinksUpdate
Class the manages updates of *_link tables as well as similar extension-managed tables.
Definition: LinksUpdate.php:35
$wgFileExtensions
$wgFileExtensions
This is the list of preferred extensions for uploading files.
Definition: DefaultSettings.php:942
NS_FILE
const NS_FILE
Definition: Defines.php:66
$file
if(PHP_SAPI !='cli-server') if(!isset( $_SERVER['SCRIPT_FILENAME'])) $file
Item class for a filearchive table row.
Definition: router.php:42
$s
$s
Definition: mergeMessageFileList.php:185
wfLogWarning
wfLogWarning( $msg, $callerOffset=1, $level=E_USER_WARNING)
Send a warning as a PHP error and the debug log.
Definition: GlobalFunctions.php:1078
PageImages\Hooks\LinksUpdateHookHandler\doLinksUpdate
doLinksUpdate(LinksUpdate $linksUpdate)
Definition: LinksUpdateHookHandler.php:75
$res
$res
Definition: testCompression.php:52
PageImages\Hooks\LinksUpdateHookHandler\getDbBlacklist
getDbBlacklist( $dbName, $page)
Returns list of images linked by the given blacklist page.
Definition: LinksUpdateHookHandler.php:285
PageImages
$dbr
$dbr
Definition: testCompression.php:50
$wgMemc
$wgMemc
Definition: Setup.php:791
Revision
Definition: Revision.php:40
Revision\newFromTitle
static newFromTitle(LinkTarget $linkTarget, $id=0, $flags=0)
Load either the current, or a specified, revision that's attached to a given link target.
Definition: Revision.php:138
PageImages\Hooks\LinksUpdateHookHandler\onLinksUpdate
static onLinksUpdate(LinksUpdate $linksUpdate)
LinksUpdate hook handler, sets at most 2 page properties depending on images on page.
Definition: LinksUpdateHookHandler.php:31
DerivativeContext
An IContextSource implementation which will inherit context from another source but allow individual ...
Definition: DerivativeContext.php:30
File
Implements some public methods and some protected utility functions which are required by multiple ch...
Definition: File.php:61
PageImages\Hooks\LinksUpdateHookHandler\getBlacklist
getBlacklist()
Returns a list of images blacklisted from influencing this extension's output.
Definition: LinksUpdateHookHandler.php:241
wfGetDB
wfGetDB( $db, $groups=[], $wiki=false)
Get a Database object.
Definition: GlobalFunctions.php:2575
$matches
$matches
Definition: NoLocalSettings.php:24
Http\get
static get( $url, array $options=[], $caller=__METHOD__)
Simple wrapper for Http::request( 'GET' )
Definition: Http.php:64
PageImages\Hooks
Definition: LinksUpdateHookHandler.php:3
$t
$t
Definition: make-normalization-table.php:143
$title
$title
Definition: testCompression.php:34
PageImages\Hooks\LinksUpdateHookHandler\getPageImageCandidates
getPageImageCandidates(LinksUpdate $linksUpdate)
Returns a list of page image candidates for consideration for scoring algorithm.
Definition: LinksUpdateHookHandler.php:43
DB_REPLICA
const DB_REPLICA
Definition: defines.php:25
LinksUpdate\getTitle
getTitle()
Return the title object of the page being updated.
Definition: LinksUpdate.php:1004
wfDebug
wfDebug( $text, $dest='all', array $context=[])
Sends a line to the debug log if enabled or, optionally, to a comment in output.
Definition: GlobalFunctions.php:913
PageImages\Hooks\LinksUpdateHookHandler\scoreFromTable
scoreFromTable( $value, array $scores)
Returns score based on table of ranges.
Definition: LinksUpdateHookHandler.php:163
PageImages\Hooks\LinksUpdateHookHandler
Handler for the "LinksUpdate" hook.
Definition: LinksUpdateHookHandler.php:22
LinksUpdate\getParserOutput
getParserOutput()
Returns parser output.
Definition: LinksUpdate.php:1013
Title\makeTitleSafe
static makeTitleSafe( $ns, $title, $fragment='', $interwiki='')
Create a new Title from a namespace index and a DB key.
Definition: Title.php:613
$content
$content
Definition: router.php:78
PageImages\Hooks\LinksUpdateHookHandler\getUrlBlacklist
getUrlBlacklist( $url)
Returns list of images on given remote blacklist page.
Definition: LinksUpdateHookHandler.php:320
PageImages\Hooks\LinksUpdateHookHandler\isImageFree
isImageFree( $fileName)
Check whether image's copyright allows it to be used freely.
Definition: LinksUpdateHookHandler.php:190
wfFindFile
wfFindFile( $title, $options=[])
Find a file.
Definition: GlobalFunctions.php:2604
$context
$context
Definition: load.php:45
FormatMetadata
Format Image metadata values into a human readable form.
Definition: FormatMetadata.php:51
Title
Represents a title within MediaWiki.
Definition: Title.php:42
PageImages\getPropName
static getPropName( $isFree)
Get property name used in page_props table.
Definition: PageImages.php:38
PageImages\Hooks\LinksUpdateHookHandler\getScore
getScore(array $image, $position)
Returns score for image, the more the better, if it is less than zero, the image shouldn't be used fo...
Definition: LinksUpdateHookHandler.php:128
$source
$source
Definition: mwdoc-filter.php:34
wfMemcKey
wfMemcKey(... $args)
Make a cache key for the local wiki.
Definition: GlobalFunctions.php:2499
LinksUpdate\getRevision
getRevision()
Definition: LinksUpdate.php:1040
Http
Various HTTP related functions.
Definition: Http.php:29