Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
0.00% covered (danger)
0.00%
0 / 100
0.00% covered (danger)
0.00%
0 / 2
CRAP
0.00% covered (danger)
0.00%
0 / 1
RefreshGlobalimagelinks
0.00% covered (danger)
0.00%
0 / 94
0.00% covered (danger)
0.00%
0 / 2
420
0.00% covered (danger)
0.00%
0 / 1
 __construct
0.00% covered (danger)
0.00%
0 / 6
0.00% covered (danger)
0.00%
0 / 1
2
 execute
0.00% covered (danger)
0.00%
0 / 88
0.00% covered (danger)
0.00%
0 / 1
380
1<?php
2/**
3 * Maintenance script to populate the globalimagelinks table. Needs to be run
4 * on all wikis.
5 */
6$path = dirname( dirname( dirname( __DIR__ ) ) );
7
8if ( getenv( 'MW_INSTALL_PATH' ) !== false ) {
9    $path = getenv( 'MW_INSTALL_PATH' );
10}
11
12require_once $path . '/maintenance/Maintenance.php';
13
14use MediaWiki\Extension\GlobalUsage\GlobalUsage;
15use MediaWiki\MediaWikiServices;
16use MediaWiki\Title\Title;
17use MediaWiki\WikiMap\WikiMap;
18
19class RefreshGlobalimagelinks extends Maintenance {
20    public function __construct() {
21        parent::__construct();
22        $this->addOption( 'start-page', 'page_id of the page to start with' );
23        $this->addOption( 'start-image', 'il_to of the image to start with' );
24        $this->addOption( 'pages', 'CSV of (existing,nonexisting)', true, true );
25        $this->setBatchSize( 500 );
26
27        $this->requireExtension( 'Global Usage' );
28    }
29
30    public function execute() {
31        $pages = explode( ',', $this->getOption( 'pages' ) );
32
33        $dbr = MediaWikiServices::getInstance()
34            ->getConnectionProvider()
35            ->getReplicaDatabase();
36        $gdbw = GlobalUsage::getGlobalDB( DB_PRIMARY );
37        $gdbr = GlobalUsage::getGlobalDB( DB_REPLICA );
38        $gu = new GlobalUsage( WikiMap::getCurrentWikiId(), $gdbw, $gdbr );
39
40        $lbFactory = MediaWikiServices::getInstance()->getDBLoadBalancerFactory();
41        $ticket = $lbFactory->getEmptyTransactionTicket( __METHOD__ );
42
43        // Clean up links for existing pages...
44        if ( in_array( 'existing', $pages ) ) {
45            $lastPageId = intval( $this->getOption( 'start-page', 0 ) );
46            $lastIlTo = $this->getOption( 'start-image' );
47
48            do {
49                $this->output( "Querying links after (page_id, il_to) = ($lastPageId$lastIlTo)\n" );
50
51                # Query all pages and any imagelinks associated with that
52                $res = $dbr->newSelectQueryBuilder()
53                    ->select( [
54                        'page_id', 'page_namespace', 'page_title',
55                        'il_to', 'img_name'
56                    ] )
57                    ->from( 'page' )
58                    // LEFT JOIN imagelinks since we need to delete usage
59                    // from all images, even if they don't have images anymore
60                    ->leftJoin( 'imagelinks', null, 'page_id = il_from' )
61                    // Check to see if images exist locally
62                    ->leftJoin( 'image', null, 'il_to = img_name' )
63                    ->where( $dbr->buildComparison( '>', [
64                        'page_id' => $lastPageId,
65                        'il_to' => $lastIlTo,
66                    ] ) )
67                    ->orderBy( $dbr->implicitOrderby() ? 'page_id' : 'page_id, il_to' )
68                    ->limit( $this->mBatchSize )
69                    ->caller( __METHOD__ )
70                    ->fetchResultSet();
71
72                # Build up a tree per pages
73                $pages = [];
74                $lastRow = null;
75                foreach ( $res as $row ) {
76                    if ( !isset( $pages[$row->page_id] ) ) {
77                        $pages[$row->page_id] = [];
78                    }
79                    # Add the imagelinks entry to the pages array if the image
80                    # does not exist locally
81                    if ( $row->il_to !== null && $row->img_name === null ) {
82                        $pages[$row->page_id][$row->il_to] = $row;
83                    }
84                    $lastRow = $row;
85                }
86
87                # Insert the imagelinks data to the global table
88                foreach ( $pages as $pageId => $rows ) {
89                    # Delete all original links if this page is not a continuation
90                    # of last iteration.
91                    if ( $pageId != $lastPageId ) {
92                        $gu->deleteLinksFromPage( $pageId );
93                    }
94                    if ( $rows ) {
95                        $title = Title::newFromRow( reset( $rows ) );
96                        $images = array_keys( $rows );
97                        # Since we have a pretty accurate page_id, don't specify
98                        # IDBAccessObject::READ_LATEST
99                        $gu->insertLinks( $title, $images, /* $flags */ 0 );
100                    }
101                }
102
103                if ( $lastRow ) {
104                    # We've processed some rows in this iteration, so save
105                    # continuation variables
106                    $lastPageId = $lastRow->page_id;
107                    $lastIlTo = $lastRow->il_to;
108
109                    # Be nice to the database
110                    $lbFactory->commitAndWaitForReplication( __METHOD__, $ticket );
111                }
112            } while ( $lastRow !== null );
113        }
114
115        // Clean up broken links from pages that no longer exist...
116        if ( in_array( 'nonexisting', $pages ) ) {
117            $lastPageId = 0;
118            while ( 1 ) {
119                $this->output( "Querying for broken links after (page_id) = ($lastPageId)\n" );
120
121                $res = $gdbw->newSelectQueryBuilder()
122                    ->select( 'gil_page' )
123                    ->from( 'globalimagelinks' )
124                    ->where( [
125                        'gil_wiki' => WikiMap::getCurrentWikiId(),
126                        $gdbw->expr( 'gil_page', '>', $lastPageId ),
127                    ] )
128                    ->orderBy( 'gil_page' )
129                    ->limit( $this->mBatchSize )
130                    ->caller( __METHOD__ )
131                    ->fetchResultSet();
132
133                if ( !$res->numRows() ) {
134                    break;
135                }
136
137                $pageIds = [];
138                foreach ( $res as $row ) {
139                    $pageIds[$row->gil_page] = false;
140                    $lastPageId = (int)$row->gil_page;
141                }
142
143                $lres = $dbr->newSelectQueryBuilder()
144                    ->select( 'page_id' )
145                    ->from( 'page' )
146                    ->where( [ 'page_id' => array_keys( $pageIds ) ] )
147                    ->caller( __METHOD__ )
148                    ->fetchResultSet();
149
150                foreach ( $lres as $row ) {
151                    $pageIds[$row->page_id] = true;
152                }
153
154                $deleted = 0;
155                foreach ( $pageIds as $pageId => $exists ) {
156                    if ( !$exists ) {
157                        $gu->deleteLinksFromPage( $pageId );
158                        ++$deleted;
159                    }
160                }
161
162                if ( $deleted > 0 ) {
163                    $lbFactory->commitAndWaitForReplication( __METHOD__, $ticket );
164                }
165            }
166        }
167    }
168}
169
170$maintClass = RefreshGlobalimagelinks::class;
171require_once RUN_MAINTENANCE_IF_MAIN;