Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
44.92% covered (danger)
44.92%
177 / 394
28.00% covered (danger)
28.00%
7 / 25
CRAP
0.00% covered (danger)
0.00%
0 / 1
WikibaseMediaInfoHooks
44.92% covered (danger)
44.92%
177 / 394
28.00% covered (danger)
28.00%
7 / 25
1205.36
0.00% covered (danger)
0.00%
0 / 1
 onWikibaseRepoEntityNamespaces
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 onWikibaseEntityTypes
100.00% covered (success)
100.00%
4 / 4
100.00% covered (success)
100.00%
1 / 1
1
 onParserOutputPostCacheTransform
100.00% covered (success)
100.00%
5 / 5
100.00% covered (success)
100.00%
1 / 1
1
 onRegistration
0.00% covered (danger)
0.00%
0 / 6
0.00% covered (danger)
0.00%
0 / 1
6
 isMediaInfoPage
100.00% covered (success)
100.00%
3 / 3
100.00% covered (success)
100.00%
1 / 1
3
 onBeforePageDisplay
79.41% covered (warning)
79.41%
27 / 34
0.00% covered (danger)
0.00%
0 / 1
7.43
 doBeforePageDisplay
75.93% covered (warning)
75.93%
41 / 54
0.00% covered (danger)
0.00%
0 / 1
14.01
 generateWbTermsLanguages
0.00% covered (danger)
0.00%
0 / 5
0.00% covered (danger)
0.00%
0 / 1
2
 generateWbMonolingualTextLanguages
0.00% covered (danger)
0.00%
0 / 5
0.00% covered (danger)
0.00%
0 / 1
2
 tabifyStructuredData
24.39% covered (danger)
24.39%
20 / 82
0.00% covered (danger)
0.00%
0 / 1
21.56
 extractStructuredDataHtml
64.71% covered (warning)
64.71%
11 / 17
0.00% covered (danger)
0.00%
0 / 1
2.18
 createEmptyStructuredData
100.00% covered (success)
100.00%
17 / 17
100.00% covered (success)
100.00%
1 / 1
1
 deleteMediaInfoData
0.00% covered (danger)
0.00%
0 / 6
0.00% covered (danger)
0.00%
0 / 1
2
 getProtectionMsg
100.00% covered (success)
100.00%
17 / 17
100.00% covered (success)
100.00%
1 / 1
6
 onGetEntityByLinkedTitleLookup
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 onGetEntityContentModelForTitle
0.00% covered (danger)
0.00%
0 / 2
0.00% covered (danger)
0.00%
0 / 1
12
 onCirrusSearchProfileService
93.75% covered (success)
93.75%
30 / 32
0.00% covered (danger)
0.00%
0 / 1
6.01
 onCirrusSearchRegisterFullTextQueryClassifiers
0.00% covered (danger)
0.00%
0 / 4
0.00% covered (danger)
0.00%
0 / 1
6
 onGetPreferences
0.00% covered (danger)
0.00%
0 / 6
0.00% covered (danger)
0.00%
0 / 1
2
 onScribuntoExternalLibraries
0.00% covered (danger)
0.00%
0 / 9
0.00% covered (danger)
0.00%
0 / 1
20
 onRevisionUndeleted
0.00% covered (danger)
0.00%
0 / 55
0.00% covered (danger)
0.00%
0 / 1
90
 onArticleUndelete
0.00% covered (danger)
0.00%
0 / 4
0.00% covered (danger)
0.00%
0 / 1
12
 onSidebarBeforeOutput
0.00% covered (danger)
0.00%
0 / 14
0.00% covered (danger)
0.00%
0 / 1
12
 onCirrusSearchAddQueryFeatures
0.00% covered (danger)
0.00%
0 / 4
0.00% covered (danger)
0.00%
0 / 1
6
 onMultiContentSave
0.00% covered (danger)
0.00%
0 / 7
0.00% covered (danger)
0.00%
0 / 1
12
1<?php
2
3namespace Wikibase\MediaInfo;
4
5use CirrusSearch\Parser\ParsedQueryClassifiersRepository;
6use CirrusSearch\Profile\SearchProfileService;
7use MediaWiki\CommentStore\CommentStoreComment;
8use MediaWiki\Config\ConfigException;
9use MediaWiki\Context\RequestContext;
10use MediaWiki\Hook\ParserOutputPostCacheTransformHook;
11use MediaWiki\Hook\SidebarBeforeOutputHook;
12use MediaWiki\Html\Html;
13use MediaWiki\MediaWikiServices;
14use MediaWiki\Output\Hook\BeforePageDisplayHook;
15use MediaWiki\Output\OutputPage;
16use MediaWiki\Page\Hook\ArticleUndeleteHook;
17use MediaWiki\Page\Hook\RevisionUndeletedHook;
18use MediaWiki\Parser\ParserOutput;
19use MediaWiki\Preferences\Hook\GetPreferencesHook;
20use MediaWiki\Registration\ExtensionDependencyError;
21use MediaWiki\Registration\ExtensionRegistry;
22use MediaWiki\Revision\RenderedRevision;
23use MediaWiki\Revision\RevisionRecord;
24use MediaWiki\Revision\SlotRecord;
25use MediaWiki\Status\Status;
26use MediaWiki\Storage\BlobStore;
27use MediaWiki\Storage\Hook\MultiContentSaveHook;
28use MediaWiki\Title\Title;
29use MediaWiki\User\TempUser\TempUserConfig;
30use MediaWiki\User\User;
31use MediaWiki\User\UserIdentity;
32use OOUI\HtmlSnippet;
33use OOUI\IndexLayout;
34use OOUI\PanelLayout;
35use OOUI\TabPanelLayout;
36use Skin;
37use Wikibase\Client\WikibaseClient;
38use Wikibase\DataModel\Entity\NumericPropertyId;
39use Wikibase\DataModel\Services\Lookup\PropertyDataTypeLookupException;
40use Wikibase\DataModel\Statement\StatementGuid;
41use Wikibase\Lib\LanguageFallbackChainFactory;
42use Wikibase\Lib\Store\EntityByLinkedTitleLookup;
43use Wikibase\Lib\UserLanguageLookup;
44use Wikibase\MediaInfo\Content\MediaInfoContent;
45use Wikibase\MediaInfo\DataAccess\Scribunto\WikibaseMediaInfoEntityLibrary;
46use Wikibase\MediaInfo\DataAccess\Scribunto\WikibaseMediaInfoLibrary;
47use Wikibase\MediaInfo\DataModel\MediaInfo;
48use Wikibase\MediaInfo\Search\Feature\CustomMatchFeature;
49use Wikibase\MediaInfo\Search\MediaSearchASTClassifier;
50use Wikibase\MediaInfo\Search\MediaSearchQueryBuilder;
51use Wikibase\MediaInfo\Services\MediaInfoByLinkedTitleLookup;
52use Wikibase\MediaInfo\Services\MediaInfoServices;
53use Wikibase\Repo\BabelUserLanguageLookup;
54use Wikibase\Repo\Content\EntityInstanceHolder;
55use Wikibase\Repo\MediaWikiLocalizedTextProvider;
56use Wikibase\Repo\ParserOutput\DispatchingEntityViewFactory;
57use Wikibase\Repo\WikibaseRepo;
58
59/**
60 * MediaWiki hook handlers for the Wikibase MediaInfo extension.
61 *
62 * @license GPL-2.0-or-later
63 * @author Bene* < benestar.wikimedia@gmail.com >
64 */
65class WikibaseMediaInfoHooks implements
66    BeforePageDisplayHook,
67    ParserOutputPostCacheTransformHook,
68    GetPreferencesHook,
69    RevisionUndeletedHook,
70    ArticleUndeleteHook,
71    SidebarBeforeOutputHook,
72    MultiContentSaveHook
73{
74
75    public const MEDIAINFO_SLOT_HEADER_PLACEHOLDER = '<mediainfoslotheader />';
76
77    /**
78     * Hook to register the MediaInfo entity namespaces for EntityNamespaceLookup.
79     *
80     * @param int[] &$entityNamespacesSetting
81     */
82    public static function onWikibaseRepoEntityNamespaces( &$entityNamespacesSetting ) {
83        // Tell Wikibase where to put our entity content.
84        $entityNamespacesSetting[ MediaInfo::ENTITY_TYPE ] = NS_FILE . '/' . MediaInfo::ENTITY_TYPE;
85    }
86
87    /**
88     * Adds the definition of the media info entity type to the definitions array Wikibase uses.
89     *
90     * @see WikibaseMediaInfo.entitytypes.php
91     *
92     * @note This is bootstrap code, it is executed for EVERY request. Avoid instantiating
93     * objects or loading classes here!
94     *
95     * @param array[] &$entityTypeDefinitions
96     */
97    public static function onWikibaseEntityTypes( array &$entityTypeDefinitions ) {
98        $entityTypeDefinitions = array_merge(
99            $entityTypeDefinitions,
100            require __DIR__ . '/../WikibaseMediaInfo.entitytypes.php'
101        );
102    }
103
104    /**
105     * The placeholder mw:slotheader is replaced by default with the name of the slot
106     *
107     * Replace it with a different placeholder so we can replace it with a message later
108     * on in onBeforePageDisplay() - can't replace it here because RequestContext (and therefore
109     * the language) is not available
110     *
111     * Won't be necessary when T205444 is done
112     *
113     * @see https://phabricator.wikimedia.org/T205444
114     * @see onBeforePageDisplay()
115     *
116     * @param ParserOutput $parserOutput
117     * @param string &$text
118     * @param array &$options
119     */
120    public function onParserOutputPostCacheTransform(
121        $parserOutput,
122        &$text,
123        &$options
124    ): void {
125        $text = str_replace(
126            '<mw:slotheader>mediainfo</mw:slotheader>',
127            self::MEDIAINFO_SLOT_HEADER_PLACEHOLDER,
128            $text
129        );
130    }
131
132    public static function onRegistration() {
133        if ( !ExtensionRegistry::getInstance()->isLoaded( 'WikibaseRepository' ) ) {
134            // HACK: Declaring a dependency on Wikibase in extension.json
135            // requires more work. See T258822.
136            throw new ExtensionDependencyError( [ [
137                'msg' => 'WikibaseMediaInfo requires Wikibase to be installed.',
138                'type' => 'missing-phpExtension',
139                'missing' => 'Wikibase',
140            ] ] );
141        }
142    }
143
144    /**
145     * @param Title|null $title
146     * @return bool
147     */
148    public static function isMediaInfoPage( ?Title $title = null ) {
149        // Check if the page exists and the page is a file
150        return $title !== null &&
151            $title->exists() &&
152            $title->inNamespace( NS_FILE );
153    }
154
155    /**
156     * Replace mediainfo-specific placeholders (if any), move structured data, add data and modules
157     *
158     * @param OutputPage $out
159     * @param \Skin $skin
160     * @throws ConfigException
161     * @throws \OOUI\Exception
162     */
163    public function onBeforePageDisplay( $out, $skin ): void {
164        $config = MediaWikiServices::getInstance()->getMainConfig();
165
166        // Hide any MediaInfo content and UI on a page, if the target page is a redirect.
167        if ( $out->getTitle()->isRedirect() ) {
168            $out = self::deleteMediaInfoData( $out );
169            return;
170        }
171
172        $imgTitle = $out->getTitle();
173
174        $isMediaInfoPage = static::isMediaInfoPage( $imgTitle ) &&
175            // … the page view is a read
176            $out->getActionName() === 'view';
177
178        $properties = $config->get( 'MediaInfoProperties' );
179        $propertyTypes = [];
180        $propertyTitles = [];
181        foreach ( $properties as $name => $property ) {
182            try {
183                // some properties/statements may have custom titles, in addition to their property
184                // label, to help clarify what data is expected there
185                // possible messages include:
186                // wikibasemediainfo-statements-title-depicts
187                $message = wfMessage( 'wikibasemediainfo-statements-title-' . ( $name ?: '' ) );
188                if ( $message->exists() ) {
189                    $propertyTitles[$property] = $message->text();
190                }
191
192                // get data type for values associated with this property
193                $propertyTypes[$property] = WBMIHooksHelper::getPropertyType( new NumericPropertyId( $property ) );
194            } catch ( PropertyDataTypeLookupException $e ) {
195                // ignore invalid properties...
196            }
197        }
198
199        $hooksObject = new self();
200        $hooksObject->doBeforePageDisplay(
201            $out,
202            $skin,
203            $isMediaInfoPage,
204            new BabelUserLanguageLookup(),
205            WikibaseRepo::getEntityViewFactory(),
206            MediaWikiServices::getInstance()->getTempUserConfig(),
207            [
208                'wbmiDefaultProperties' => array_values( $properties ),
209                'wbmiPropertyTitles' => $propertyTitles,
210                'wbmiPropertyTypes' => $propertyTypes,
211                'wbmiRepoApiUrl' => wfScript( 'api' ),
212                'wbmiHelpUrls' => $config->get( 'MediaInfoHelpUrls' ),
213                'wbmiExternalEntitySearchBaseUri' => $config->get( 'MediaInfoExternalEntitySearchBaseUri' ),
214                'wbmiSupportedDataTypes' => $config->get( 'MediaInfoSupportedDataTypes' ),
215            ]
216        );
217    }
218
219    /**
220     * @param OutputPage $out
221     * @param \Skin $skin
222     * @param bool $isMediaInfoPage
223     * @param UserLanguageLookup $userLanguageLookup
224     * @param DispatchingEntityViewFactory $entityViewFactory
225     * @param TempUserConfig $tempUserConfig
226     * @param array $jsConfigVars Variables to expose to JavaScript
227     * @throws \OOUI\Exception
228     */
229    public function doBeforePageDisplay(
230        $out,
231        $skin,
232        $isMediaInfoPage,
233        UserLanguageLookup $userLanguageLookup,
234        DispatchingEntityViewFactory $entityViewFactory,
235        TempUserConfig $tempUserConfig,
236        array $jsConfigVars = []
237    ) {
238        // Site-wide config
239        $modules = [];
240        $moduleStyles = [];
241
242        if ( $isMediaInfoPage ) {
243            OutputPage::setupOOUI();
244            $out = $this->tabifyStructuredData( $out, $entityViewFactory );
245            $out->setPreventClickjacking( true );
246            $imgTitle = $out->getTitle();
247            $entityId = MediaInfoServices::getMediaInfoIdLookup()->getEntityIdForTitle( $imgTitle );
248
249            $entityLookup = WikibaseRepo::getEntityLookup();
250            $entityRevisionId = $entityLookup->hasEntity( $entityId ) ? $imgTitle->getLatestRevID() : null;
251            $entity = $entityLookup->getEntity( $entityId );
252            $serializer = WikibaseRepo::getAllTypesEntitySerializer();
253            $entityData = ( $entity ? $serializer->serialize( $entity ) : null );
254
255            $existingPropertyTypes = [];
256            if ( $entity instanceof MediaInfo ) {
257                foreach ( $entity->getStatements() as $statement ) {
258                    $propertyId = $statement->getPropertyId();
259                    try {
260                        $existingPropertyTypes[$propertyId->getSerialization()] =
261                            WBMIHooksHelper::getPropertyType( $propertyId );
262                    } catch ( PropertyDataTypeLookupException $e ) {
263                        // ignore when property can't be found - it likely no longer exists;
264                        // either way, we can't find what datatype is has, so there's no
265                        // useful data to be gathered here
266                    }
267                    foreach ( $statement->getQualifiers() as $qualifierSnak ) {
268                        $qualifierPropertyId = $qualifierSnak->getPropertyId();
269                        try {
270                            $existingPropertyTypes[$qualifierPropertyId->getSerialization()] =
271                                WBMIHooksHelper::getPropertyType( $qualifierPropertyId );
272                        } catch ( PropertyDataTypeLookupException $e ) {
273                            // ignore when property can't be found - it likely no longer exists;
274                            // either way, we can't find what datatype is has, so there's no
275                            // useful data to be gathered here
276                        }
277                    }
278                }
279            }
280
281            $modules[] = 'wikibase.mediainfo.filePageDisplay';
282            $moduleStyles[] = 'wikibase.mediainfo.filepage.styles';
283            $moduleStyles[] = 'wikibase.mediainfo.statements.styles';
284
285            $jsConfigVars = array_merge( $jsConfigVars, [
286                'wbUserSpecifiedLanguages' => $userLanguageLookup->getAllUserLanguages(
287                    $out->getUser()
288                ),
289                'wbCurrentRevision' => $entityRevisionId,
290                'wbEntityId' => $entityId->getSerialization(),
291                'wbEntity' => $entityData,
292                'wbmiMinCaptionLength' => 5,
293                'wbmiMaxCaptionLength' => WBMIHooksHelper::getMaxCaptionLength(),
294                'wbmiParsedMessageAnonEditWarning' => $out->msg(
295                    'anoneditwarning',
296                    // Log-in link
297                    '{{fullurl:Special:UserLogin|returnto={{FULLPAGENAMEE}}}}',
298                    // Sign-up link
299                    '{{fullurl:Special:UserLogin/signup|returnto={{FULLPAGENAMEE}}}}'
300                )->parseAsBlock(),
301                'wbmiProtectionMsg' => $this->getProtectionMsg( $out ),
302                'wbmiShowIPEditingWarning' => !$tempUserConfig->isEnabled(),
303                // extend/override wbmiPropertyTypes (which already contains a property type map
304                // for all default properties) with property types for existing statements
305                'wbmiPropertyTypes' => $jsConfigVars['wbmiPropertyTypes'] + $existingPropertyTypes,
306            ] );
307
308            if ( ExtensionRegistry::getInstance()->isLoaded( 'WikibaseQualityConstraints' ) ) {
309                // Don't display constraints violations unless the user is logged in and can edit
310                if ( !$out->getUser()->isAnon() && $out->getUser()->probablyCan( 'edit', $imgTitle ) ) {
311                    $modules[] = 'wikibase.quality.constraints.ui';
312                    $modules[] = 'wikibase.quality.constraints.icon';
313                    $jsConfigVars['wbmiDoConstraintCheck'] = true;
314                }
315            }
316        }
317
318        $out->addJsConfigVars( $jsConfigVars );
319        $out->addModuleStyles( $moduleStyles );
320        $out->addModules( $modules );
321    }
322
323    /**
324     * Generate the list of languages that can be used in terms.
325     * This will be exposed as part of a ResourceLoader package module.
326     *
327     * @return string[] language codes as keys, autonyms as values
328     */
329    public static function generateWbTermsLanguages() {
330        $services = MediaWikiServices::getInstance();
331        $allLanguages = $services->getLanguageNameUtils()->getLanguageNames();
332        $termsLanguages = WikibaseRepo::getTermsLanguages( $services )
333            ->getLanguages();
334
335        // use <code> => <name> for known languages; and add
336        // <code> => <code> for all additional acceptable language
337        // (that are not known to mediawiki)
338        return $allLanguages + array_combine( $termsLanguages, $termsLanguages );
339    }
340
341    /**
342     * Generate the list of languages that can be used in monolingual text.
343     * This will be exposed as part of a ResourceLoader package module.
344     *
345     * @return string[] language codes as keys, autonyms as values
346     */
347    public static function generateWbMonolingualTextLanguages() {
348        $services = MediaWikiServices::getInstance();
349        $allLanguages = $services->getLanguageNameUtils()->getLanguageNames();
350        $monolingualTextLanguages = WikibaseRepo::getMonolingualTextLanguages( $services )
351            ->getLanguages();
352
353        // use <code> => <name> for known languages; and add
354        // <code> => <code> for all additional acceptable language
355        // (that are not known to mediawiki)
356        return $allLanguages + array_combine( $monolingualTextLanguages, $monolingualTextLanguages );
357    }
358
359    /**
360     * @param OutputPage $out
361     * @param DispatchingEntityViewFactory $entityViewFactory
362     * @return OutputPage $out
363     * @throws \OOUI\Exception
364     */
365    private function tabifyStructuredData(
366        OutputPage $out,
367        DispatchingEntityViewFactory $entityViewFactory
368    ) {
369        $html = $out->getHTML();
370        $out->clearHTML();
371        $textProvider = new MediaWikiLocalizedTextProvider( $out->getLanguage() );
372
373        // Remove the slot header, as it's made redundant by the tabs
374        $html = preg_replace( WBMIHooksHelper::getStructuredDataHeaderRegex(), '', $html );
375
376        // Snip out out the structured data sections ($captions, $statements)
377        $extractedHtml = $this->extractStructuredDataHtml( $html, $out, $entityViewFactory );
378        if ( preg_match(
379            WBMIHooksHelper::getMediaInfoCaptionsRegex(),
380            $extractedHtml['structured'],
381            $matches
382        ) ) {
383            $captions = $matches[1];
384        }
385
386        if ( preg_match(
387            WBMIHooksHelper::getMediaInfoStatementsRegex(),
388            $extractedHtml['structured'],
389            $matches
390        ) ) {
391            $statements = $matches[1];
392        }
393
394        if ( empty( $captions ) || empty( $statements ) ) {
395            // Something has gone wrong - markup should have been created for empty/missing data.
396            // Return the html unmodified (this should not be reachable, it's here just in case)
397            $out->addHTML( $html );
398            return $out;
399        }
400
401        // Add a title to statements for no-js
402        $statements = Html::element(
403            'h2',
404            [ 'class' => 'wbmi-structured-data-header' ],
405            $textProvider->get( 'wikibasemediainfo-filepage-structured-data-heading' )
406        ) . $statements;
407
408        // Tab 1 will be everything after (and including) <div id="mw-imagepage-content">
409        // except for children of #mw-imagepage-content before .mw-parser-output (e.g. diffs)
410        $tab1ContentRegex = '/(<div\b[^>]*\bid=(\'|")mw-imagepage-content\\2[^>]*>)(.*)' .
411            '(<div\b[^>]*\bclass=(\'|")[^\'"]+mw-parser-output\\5[^>]*>.*$)/is';
412        // Snip out the div, and replace with a placeholder
413        if (
414            preg_match(
415                $tab1ContentRegex,
416                $extractedHtml['unstructured'],
417                $matches
418            )
419        ) {
420            $tab1Html = $matches[1] . $matches[4];
421
422            // insert captions at the beginning of Tab1
423            $tab1Html = $captions . $tab1Html;
424
425            $html = preg_replace(
426                $tab1ContentRegex,
427                '$3<WBMI_TABS_PLACEHOLDER>',
428                $extractedHtml['unstructured']
429            );
430            // Add a title for no-js
431            $tab1Html = Html::element(
432                'h2',
433                [ 'class' => 'wbmi-captions-header' ],
434                $textProvider->get( 'wikibasemediainfo-filepage-captions-title' )
435            ) . $tab1Html;
436        } else {
437            // If the div isn't found, something has gone wrong - return unmodified html
438            // (this should not be reachable, it's here just in case)
439            $out->addHTML( $html );
440            return $out;
441        }
442
443        // Prepare tab panels
444        $tab1 = new TabPanelLayout(
445            'wikiTextPlusCaptions',
446            [
447                'classes' => [ 'wbmi-tab' ],
448                'label' => $textProvider->get( 'wikibasemediainfo-filepage-fileinfo-heading' ),
449                'content' => new HtmlSnippet( $tab1Html ),
450                'expanded' => false,
451            ]
452        );
453        $tab2 = new TabPanelLayout(
454            'statements',
455            [
456                'classes' => [ 'wbmi-tab' ],
457                'label' => $textProvider->get( 'wikibasemediainfo-filepage-structured-data-heading' ),
458                'content' => new HtmlSnippet( $statements ),
459                'expanded' => false,
460            ]
461        );
462        $tabs = new IndexLayout( [
463            'autoFocus' => false,
464            'classes' => [ 'wbmi-tabs' ],
465            'expanded' => false,
466            'framed' => false,
467        ] );
468        $tabs->addTabPanels( [ $tab1, $tab2 ] );
469        // This shouldn't be needed, as this is the first tab, but it is (T340803)
470        $tabs->setTabPanel( 'wikiTextPlusCaptions' );
471        $tabs->setInfusable( true );
472
473        $tabWrapper = new PanelLayout( [
474            'classes' => [ 'wbmi-tabs-container' ],
475            'content' => $tabs,
476            'expanded' => false,
477            'framed' => false,
478        ] );
479
480        // Replace the placeholder with the tabs
481        $html = str_replace( '<WBMI_TABS_PLACEHOLDER>', $tabWrapper, $html );
482
483        $out->addHTML( $html );
484        return $out;
485    }
486
487    /**
488     * Returns an array with 2 elements
489     * [
490     *     'unstructured' => html output with structured data removed
491     *  'structured' => structured data as html ... if there is no structured data an empty
492     *         mediainfoview is used to create the html
493     * ]
494     *
495     * @param string $html
496     * @param OutputPage $out
497     * @param DispatchingEntityViewFactory $entityViewFactory
498     * @return string[]
499     */
500    private function extractStructuredDataHtml(
501        $html,
502        OutputPage $out,
503        DispatchingEntityViewFactory $entityViewFactory
504    ) {
505        if ( preg_match(
506            WBMIHooksHelper::getMediaInfoViewRegex(),
507            $html,
508            $matches
509        ) ) {
510            $structured = $matches[1];
511            $unstructured = preg_replace(
512                WBMIHooksHelper::getMediaInfoViewRegex(),
513                '',
514                $html
515            );
516        } else {
517            $unstructured = $html;
518            $structured = $this->createEmptyStructuredData( $out, $entityViewFactory );
519        }
520        return [
521            'unstructured' => $unstructured,
522            'structured' => $structured,
523        ];
524    }
525
526    private function createEmptyStructuredData(
527        OutputPage $out,
528        DispatchingEntityViewFactory $entityViewFactory
529    ) {
530        $emptyMediaInfo = new MediaInfo();
531        $fallbackChainFactory = new LanguageFallbackChainFactory();
532        $view = $entityViewFactory->newEntityView(
533            $out->getLanguage(),
534            $fallbackChainFactory->newFromLanguage( $out->getLanguage() ),
535            $emptyMediaInfo
536        );
537
538        $structured = $view->getContent(
539            $emptyMediaInfo,
540            /* EntityRevision::UNSAVED_REVISION */
541            0
542            )->getHtml();
543
544        // Strip out the surrounding <mediaInfoView> tag
545        $structured = preg_replace(
546            WBMIHooksHelper::getMediaInfoViewRegex(),
547            '$1',
548            $structured
549        );
550
551        return $structured;
552    }
553
554    /**
555     * Delete all MediaInfo data from the output
556     *
557     * @param OutputPage $out
558     * @return OutputPage
559     */
560    private static function deleteMediaInfoData( $out ) {
561        $html = $out->getHTML();
562        $out->clearHTML();
563        $html = preg_replace( WBMIHooksHelper::getMediaInfoViewRegex(), '', $html );
564        $html = preg_replace( WBMIHooksHelper::getStructuredDataHeaderRegex(), '', $html );
565        $out->addHTML( $html );
566        return $out;
567    }
568
569    /**
570     * If this file is protected, get the appropriate message for the user.
571     *
572     * Passing the message HTML to JS may not be ideal, but some messages are
573     * templates and template syntax isn't supported in JS. See
574     * https://www.mediawiki.org/wiki/Manual:Messages_API#Using_messages_in_JavaScript.
575     *
576     * @param OutputPage $out
577     * @return string|null
578     */
579    private function getProtectionMsg( $out ) {
580        $imgTitle = $out->getTitle();
581        $msg = null;
582
583        $services = MediaWikiServices::getInstance();
584        $restrictionStore = $services->getRestrictionStore();
585
586        // Full protection.
587        if ( $restrictionStore->isProtected( $imgTitle, 'edit' ) &&
588            !$restrictionStore->isSemiProtected( $imgTitle, 'edit' )
589        ) {
590            $msg = $out->msg( 'protectedpagetext', 'editprotected', 'edit' )->parseAsBlock();
591        }
592
593        // Semi-protection.
594        if ( $restrictionStore->isSemiProtected( $imgTitle, 'edit' ) ) {
595            $msg = $out->msg( 'protectedpagetext', 'editsemiprotected', 'edit' )->parseAsBlock();
596        }
597
598        // Cascading protection.
599        if ( $restrictionStore->isCascadeProtected( $imgTitle ) ) {
600            // Get the protected page(s) causing this file to be protected.
601            [ $cascadeSources ] = $restrictionStore->getCascadeProtectionSources( $imgTitle );
602            $sources = '';
603            $titleFormatter = $services->getTitleFormatter();
604            foreach ( $cascadeSources as $pageIdentity ) {
605                $sources .= '* [[:' . $titleFormatter->getPrefixedText( $pageIdentity ) . "]]\n";
606            }
607
608            $msg = $out->msg( 'cascadeprotected', count( $cascadeSources ), $sources )->parseAsBlock();
609        }
610
611        return $msg;
612    }
613
614    public static function onGetEntityByLinkedTitleLookup( EntityByLinkedTitleLookup &$lookup ) {
615        $lookup = new MediaInfoByLinkedTitleLookup( $lookup );
616    }
617
618    public static function onGetEntityContentModelForTitle( Title $title, &$contentModel ) {
619        if ( $title->inNamespace( NS_FILE ) && $title->getArticleID() ) {
620            $contentModel = MediaInfoContent::CONTENT_MODEL_ID;
621        }
622    }
623
624    /**
625     * Register a ProfileContext for cirrus that will mean that queries in NS_FILE will use
626     * the MediaQueryBuilder class for searching
627     *
628     * @param SearchProfileService $service
629     */
630    public static function onCirrusSearchProfileService( SearchProfileService $service ) {
631        global $wgWBCSUseCirrus;
632        if ( !$wgWBCSUseCirrus ) {
633            // avoid leaking into CirrusSearch test suite, where $wgWBCSUseCirrus
634            // will be false
635            return;
636        }
637
638        // Register the query builder profiles so that they are usable in interleaved A/B test
639        $service->registerFileRepository( SearchProfileService::FT_QUERY_BUILDER,
640            // this string is to prevent overwriting, not used for retrieval
641            'mediainfo_base',
642            __DIR__ . '/Search/MediaSearchProfiles.php' );
643
644        $service->registerFileRepository( SearchProfileService::RESCORE,
645            // this string is to prevent overwriting, not used for retrieval
646            'mediainfo_base',
647            __DIR__ . '/Search/MediaSearchRescoreProfiles.php' );
648
649        $service->registerFileRepository( SearchProfileService::RESCORE_FUNCTION_CHAINS,
650            // this string is to prevent overwriting, not used for retrieval
651            'mediainfo_base',
652            __DIR__ . '/Search/MediaSearchRescoreFunctionChains.php' );
653
654        $searchProfileContextName = MediaSearchQueryBuilder::SEARCH_PROFILE_CONTEXT_NAME;
655        // array key in MediaSearchProfiles.php
656        $rescoreProfileName = 'classic_noboostlinks_max_boost_template';
657
658        // Need to register a rescore profile for the profile context
659        $service->registerDefaultProfile( SearchProfileService::RESCORE,
660            $searchProfileContextName, $rescoreProfileName );
661
662        $request = RequestContext::getMain()->getRequest();
663        $mwServices = MediaWikiServices::getInstance();
664        $config = $mwServices->getMainConfig();
665        $profiles = array_keys( $config->get( 'MediaInfoMediaSearchProfiles' ) ?: [] );
666        if ( $profiles ) {
667            // first profile is the default mediasearch profile
668            $fulltextProfileName = $profiles[0];
669
670            foreach ( $profiles as $profile ) {
671                if ( $request->getCheck( $profile ) ) {
672                    // switch to non-default implementations (only) when explicitly requested
673                    $fulltextProfileName = $profile;
674                }
675            }
676
677            $service->registerDefaultProfile( SearchProfileService::FT_QUERY_BUILDER,
678                $searchProfileContextName, $fulltextProfileName );
679
680            $service->registerFTSearchQueryRoute(
681                $searchProfileContextName,
682                1,
683                // only for NS_FILE searches
684                [ NS_FILE ],
685                // only when the search query is found to be something mediasearch
686                // is capable of dealing with (as determined by MediaSearchASTClassifier)
687                [ $fulltextProfileName ]
688            );
689        }
690    }
691
692    public static function onCirrusSearchRegisterFullTextQueryClassifiers(
693        ParsedQueryClassifiersRepository $repository
694    ) {
695        $mwServices = MediaWikiServices::getInstance();
696        $config = $mwServices->getMainConfig();
697        $profiles = array_keys( $config->get( 'MediaInfoMediaSearchProfiles' ) ?: [] );
698        $repository->registerClassifier( new MediaSearchASTClassifier( $profiles ) );
699    }
700
701    /**
702     * Handler for the GetPreferences hook
703     *
704     * @param User $user
705     * @param array[] &$preferences
706     */
707    public function onGetPreferences( $user, &$preferences ) {
708        $preferences['wbmi-cc0-confirmed'] = [
709            'type' => 'api'
710        ];
711
712        $preferences['wbmi-wikidata-link-notice-dismissed'] = [
713            'type' => 'api'
714        ];
715    }
716
717    /**
718     * External libraries for Scribunto
719     *
720     * @param string $engine
721     * @param string[] &$extraLibraries
722     */
723    public static function onScribuntoExternalLibraries( $engine, array &$extraLibraries ) {
724        if ( !ExtensionRegistry::getInstance()->isLoaded( 'WikibaseClient' ) ) {
725            return;
726        }
727        $allowDataTransclusion = WikibaseClient::getSettings()->getSetting( 'allowDataTransclusion' );
728        if ( $engine === 'lua' && $allowDataTransclusion === true ) {
729            $extraLibraries['mw.wikibase.mediainfo'] = WikibaseMediaInfoLibrary::class;
730            $extraLibraries['mw.wikibase.mediainfo.entity'] = [
731                'class' => WikibaseMediaInfoEntityLibrary::class,
732                'deferLoad' => true,
733            ];
734        }
735    }
736
737    /**
738     * @param RevisionRecord $revision
739     * @param ?int $oldPageID
740     */
741    public function onRevisionUndeleted( $revision, $oldPageID ) {
742        $title = Title::newFromLinkTarget( $revision->getPageAsLinkTarget() );
743        if ( !$title->inNamespace( NS_FILE ) ) {
744            // short-circuit if we're not even dealing with a file
745            return;
746        }
747
748        if ( !$revision->hasSlot( 'mediainfo' ) ) {
749            // no mediainfo content found
750            return;
751        }
752
753        $mwServices = MediaWikiServices::getInstance();
754        $dbw = $mwServices->getDBLoadBalancerFactory()->getPrimaryDatabase();
755        $blobStore = $mwServices->getBlobStoreFactory()->newSqlBlobStore();
756        $statementGuidParser = WikibaseRepo::getStatementGuidParser( $mwServices );
757
758        // fetch existing entity data from old revision
759        $slot = $revision->getSlot( 'mediainfo', RevisionRecord::RAW );
760        $existingContentId = $slot->getContentId();
761        $existingContent = $slot->getContent();
762        if ( !( $existingContent instanceof MediaInfoContent ) ) {
763            return;
764        }
765        $existingEntity = $existingContent->getEntity();
766        $existingEntityId = $existingEntity->getId();
767
768        // generate actual correct entity id for this title
769        $entityIdLookup = MediaInfoServices::getMediaInfoIdLookup();
770        $newEntityId = $entityIdLookup->getEntityIdForTitle( $title );
771        if ( $existingEntityId === null || $newEntityId === null || $existingEntityId->equals( $newEntityId ) ) {
772            return;
773        }
774
775        // create new content object with the same content, but this id
776        $newEntity = $existingEntity->copy();
777        $newEntity->setId( $newEntityId );
778        foreach ( $newEntity->getStatements()->toArray() as $statement ) {
779            // statement GUIDs also contain the M-id, so let's go fix those too
780            $existingStatementGuidString = $statement->getGuid();
781            // cast GUID to non-null for Phan (we know it exists)
782            '@phan-var string $existingStatementGuidString';
783            $existingStatementGuid = $statementGuidParser->parse( $existingStatementGuidString );
784            if ( !$newEntityId->equals( $existingStatementGuid->getEntityId() ) ) {
785                $newStatementGuid = new StatementGuid( $newEntityId, $existingStatementGuid->getGuidPart() );
786                $statement->setGuid( (string)$newStatementGuid );
787            }
788        }
789        $newContent = new MediaInfoContent( new EntityInstanceHolder( $newEntity ) );
790
791        // store updated content in blob store
792        $unsavedSlot = SlotRecord::newUnsaved( 'mediainfo', $newContent );
793        $blobAddress = $blobStore->storeBlob(
794            $newContent->serialize( $newContent->getDefaultFormat() ),
795            [
796                BlobStore::PAGE_HINT => $revision->getPageId(),
797                BlobStore::REVISION_HINT => $revision->getId(),
798                BlobStore::PARENT_HINT => $revision->getParentId(),
799                BlobStore::DESIGNATION_HINT => 'page-content',
800                BlobStore::ROLE_HINT => $unsavedSlot->getRole(),
801                BlobStore::SHA1_HINT => $unsavedSlot->getSha1(),
802                BlobStore::MODEL_HINT => $newContent->getModel(),
803                BlobStore::FORMAT_HINT => $newContent->getDefaultFormat(),
804            ]
805        );
806
807        // update content record to point to new, corrected, content blob
808        $dbw->newUpdateQueryBuilder()
809            ->update( 'content' )
810            ->set( [
811                'content_size' => $unsavedSlot->getSize(),
812                'content_sha1' => $unsavedSlot->getSha1(),
813                'content_address' => $blobAddress,
814            ] )
815            ->where( [
816                'content_id' => $existingContentId,
817            ] )
818            ->caller( __METHOD__ )
819            ->execute();
820    }
821
822    /**
823     * @param Title $title
824     * @param bool $create
825     * @param string $comment
826     * @param int $oldPageId
827     * @param array $restoredPages
828     */
829    public function onArticleUndelete( $title, $create, $comment, $oldPageId, $restoredPages ) {
830        if ( !$title->inNamespace( NS_FILE ) || $oldPageId === $title->getArticleID() ) {
831            return;
832        }
833
834        // above onArticleRevisionUndeleted hook has been fixing MediaInfo ids
835        // for every undeleted revision, but now that that process is done, we
836        // need to clear the parser caches that (may have) been created during
837        // the undelete process as they were based on incorrect entities
838        $page = MediaWikiServices::getInstance()->getWikiPageFactory()->newFromTitle( $title );
839        $page->updateParserCache( [ 'causeAction' => 'mediainfo-id-splitting' ] );
840    }
841
842    /**
843     * Add Concept URI link to the toolbox section of the sidebar.
844     *
845     * @param Skin $skin
846     * @param string[] &$sidebar
847     * @return void
848     */
849    public function onSidebarBeforeOutput( $skin, &$sidebar ): void {
850        $title = $skin->getTitle();
851        if ( !static::isMediaInfoPage( $title ) ) {
852            return;
853        }
854
855        $entityId = MediaInfoServices::getMediaInfoIdLookup()->getEntityIdForTitle( $title );
856        if ( $entityId === null ) {
857            return;
858        }
859
860        $baseConceptUri = WikibaseRepo::getLocalEntitySource()
861            ->getConceptBaseUri();
862
863        $sidebar['TOOLBOX']['wb-concept-uri'] = [
864            'id' => 't-wb-concept-uri',
865            'text' => $skin->msg( 'wikibase-concept-uri' )->text(),
866            'href' => $baseConceptUri . $entityId->getSerialization(),
867            'title' => $skin->msg( 'wikibase-concept-uri-tooltip' )->text()
868        ];
869    }
870
871    /**
872     * Add extra cirrus search query features for wikibase
873     *
874     * @param \CirrusSearch\SearchConfig $config (not used, required by hook)
875     * @param array &$extraFeatures
876     */
877    public static function onCirrusSearchAddQueryFeatures( $config, array &$extraFeatures ) {
878        $featureConfig = MediaWikiServices::getInstance()->getMainConfig()
879            ->get( 'MediaInfoCustomMatchFeature' );
880        if ( $featureConfig ) {
881            $extraFeatures[] = new CustomMatchFeature( $featureConfig );
882        }
883    }
884
885    /**
886     * @param RenderedRevision $renderedRevision
887     * @param UserIdentity $author
888     * @param CommentStoreComment $summary
889     * @param int $flags
890     * @param Status $hookStatus
891     */
892    public function onMultiContentSave(
893        $renderedRevision,
894        $author,
895        $summary,
896        $flags,
897        $hookStatus
898    ) {
899        if ( ( $flags & EDIT_AUTOSUMMARY ) !== 0 && $renderedRevision->getRevision()->hasSlot( 'mediainfo' ) ) {
900            // remove coordinates from edit summaries when deleting location statements
901            // @see https://phabricator.wikimedia.org/T298700
902            $coordinate = '\d+°(\d+\'(\d+(\.\d+)?")?)?';
903            $summary->text = preg_replace(
904                "/(\/\* wbremoveclaims-remove:.+? \*\/ .+?): {$coordinate}[NS], {$coordinate}[EW]/u",
905                '$1',
906                $summary->text
907            );
908        }
909    }
910}