Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
0.00% |
0 / 108 |
|
0.00% |
0 / 5 |
CRAP | |
0.00% |
0 / 1 |
UpdateWeightedTags | |
0.00% |
0 / 101 |
|
0.00% |
0 / 5 |
2162 | |
0.00% |
0 / 1 |
__construct | |
0.00% |
0 / 12 |
|
0.00% |
0 / 1 |
2 | |||
execute | |
0.00% |
0 / 12 |
|
0.00% |
0 / 1 |
20 | |||
validateParams | |
0.00% |
0 / 26 |
|
0.00% |
0 / 1 |
420 | |||
getPageIdentities | |
0.00% |
0 / 18 |
|
0.00% |
0 / 1 |
72 | |||
readLineBatch | |
0.00% |
0 / 33 |
|
0.00% |
0 / 1 |
182 |
1 | <?php |
2 | |
3 | namespace CirrusSearch\Maintenance; |
4 | |
5 | use CirrusSearch\CirrusSearch; |
6 | use Generator; |
7 | use MediaWiki\MediaWikiServices; |
8 | use MediaWiki\Page\ProperPageIdentity; |
9 | use MediaWiki\Title\MalformedTitleException; |
10 | use MediaWiki\Title\Title; |
11 | use SplFileObject; |
12 | |
13 | /** |
14 | * Update the weighted_tags field for a page for a specific tag. |
15 | * |
16 | * This program is free software; you can redistribute it and/or modify |
17 | * it under the terms of the GNU General Public License as published by |
18 | * the Free Software Foundation; either version 2 of the License, or |
19 | * (at your option) any later version. |
20 | * |
21 | * This program is distributed in the hope that it will be useful, |
22 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
23 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
24 | * GNU General Public License for more details. |
25 | * |
26 | * You should have received a copy of the GNU General Public License along |
27 | * with this program; if not, write to the Free Software Foundation, Inc., |
28 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
29 | * http://www.gnu.org/copyleft/gpl.html |
30 | */ |
31 | |
32 | $IP = getenv( 'MW_INSTALL_PATH' ); |
33 | if ( $IP === false ) { |
34 | $IP = __DIR__ . '/../../..'; |
35 | } |
36 | require_once "$IP/maintenance/Maintenance.php"; |
37 | require_once __DIR__ . '/../includes/Maintenance/Maintenance.php'; |
38 | |
39 | class UpdateWeightedTags extends Maintenance { |
40 | public function __construct() { |
41 | parent::__construct(); |
42 | $this->addDescription( "Update the weighted_tags field for a page for a specific tag." ); |
43 | $this->addOption( 'page', 'Page title', false, true ); |
44 | $this->addOption( 'page-list', 'Path to file with a list of page titles, one per line.', false, true ); |
45 | $this->addOption( 'pageid-list', 'Path to file with a list of page IDs, one per line.', false, true ); |
46 | $this->addOption( 'tagType', "Tag type. A string such as 'recommendation.link'.", true, true ); |
47 | $this->addOption( 'tagName', "Tag name. Some tag types don't use this.", false, true, false, true ); |
48 | $this->addOption( 'weight', "Weight (0-1000). Some tag types don't use this. When used, must occur the same number of" |
49 | . " times as --tagName and will be matched by position.", false, true, false, true ); |
50 | $this->addOption( 'reset', 'Reset a tag type (remove all tags belonging to it). Cannot be mixed with --tagName and --weight.' ); |
51 | $this->addOption( 'verbose', 'Verbose output.' ); |
52 | $this->setBatchSize( 50 ); |
53 | } |
54 | |
55 | public function execute() { |
56 | $this->validateParams(); |
57 | foreach ( $this->getPageIdentities() as $pageIdentity ) { |
58 | $tagPrefix = $this->getOption( 'tagType' ); |
59 | $cirrusSearch = new CirrusSearch(); |
60 | if ( $this->hasOption( 'reset' ) ) { |
61 | $cirrusSearch->resetWeightedTags( $pageIdentity, $tagPrefix ); |
62 | } else { |
63 | $tagNames = $this->getOption( 'tagName' ); |
64 | $tagWeights = $this->getOption( 'weight' ); |
65 | if ( $tagWeights !== null ) { |
66 | $tagWeights = array_map( 'intval', $tagWeights ); |
67 | $tagWeights = array_combine( $tagNames, $tagWeights ); |
68 | } |
69 | $cirrusSearch->updateWeightedTags( $pageIdentity, $tagPrefix, $tagNames, $tagWeights ); |
70 | } |
71 | } |
72 | } |
73 | |
74 | private function validateParams() { |
75 | $pageOptionCount = (int)$this->hasOption( 'page' ) + (int)$this->hasOption( 'page-list' ) |
76 | + (int)$this->hasOption( 'pageid-list' ); |
77 | if ( $pageOptionCount !== 1 ) { |
78 | $this->fatalError( "Exactly one of --page, --page-list and --pageid-list must be used" ); |
79 | } elseif ( $this->hasOption( 'page-list' ) && !is_readable( $this->getOption( 'page-list' ) ) ) { |
80 | $this->fatalError( 'Cannot read page list from ' . $this->getOption( 'page-list' ) ); |
81 | } elseif ( $this->hasOption( 'pageid-list' ) && !is_readable( $this->getOption( 'pageid-list' ) ) ) { |
82 | $this->fatalError( 'Cannot read page ID list from ' . $this->getOption( 'page-list' ) ); |
83 | } |
84 | |
85 | if ( strpos( $this->getOption( 'tagType' ), '/' ) !== false ) { |
86 | $this->fatalError( 'The tag type cannot contain a / character' ); |
87 | } |
88 | |
89 | if ( $this->hasOption( 'reset' ) ) { |
90 | if ( $this->hasOption( 'tagName' ) || $this->hasOption( 'weight' ) ) { |
91 | $this->fatalError( '--reset cannot be used with --tagName or --weight' ); |
92 | } |
93 | } else { |
94 | $tagNames = $this->getOption( 'tagName' ); |
95 | $tagWeights = $this->getOption( 'weight' ); |
96 | |
97 | if ( $tagNames === null ) { |
98 | if ( $tagWeights !== null ) { |
99 | $this->fatalError( '--weight should be used together with --tagName' ); |
100 | } |
101 | } else { |
102 | if ( $tagWeights && count( $tagNames ) !== count( $tagWeights ) ) { |
103 | $this->fatalError( 'When --weight is used, it must occur the same number of times as --tagName' ); |
104 | } |
105 | foreach ( $tagNames as $tagName ) { |
106 | if ( strpos( $tagName, '|' ) !== false ) { |
107 | $this->fatalError( "Wrong tag name '$tagName': cannot contain | character" ); |
108 | } |
109 | } |
110 | foreach ( $tagWeights ?? [] as $tagWeight ) { |
111 | if ( !ctype_digit( $tagWeight ) || ( $tagWeight < 1 ) || ( $tagWeight > 1000 ) ) { |
112 | $this->fatalError( "Wrong tag weight '$tagWeight': must be an integer between 1 and 1000" ); |
113 | } |
114 | } |
115 | } |
116 | } |
117 | } |
118 | |
119 | /** |
120 | * @return Generator<ProperPageIdentity> |
121 | */ |
122 | private function getPageIdentities() { |
123 | if ( $this->hasOption( 'page' ) ) { |
124 | $pageName = $this->getOption( 'page' ); |
125 | $title = Title::newFromText( $pageName ); |
126 | if ( !$title ) { |
127 | $this->fatalError( "Invalid title $pageName" ); |
128 | } elseif ( !$title->canExist() ) { |
129 | $this->fatalError( "$pageName is not a proper page" ); |
130 | } elseif ( !$title->exists() ) { |
131 | $this->fatalError( "$pageName does not exist" ); |
132 | } |
133 | if ( $title->hasFragment() ) { |
134 | $title->setFragment( '' ); |
135 | } |
136 | yield $title->toPageIdentity(); |
137 | } else { |
138 | $useIds = $this->hasOption( 'pageid-list' ); |
139 | if ( $useIds ) { |
140 | $file = new SplFileObject( $this->getOption( 'pageid-list' ) ); |
141 | } else { |
142 | $file = new SplFileObject( $this->getOption( 'page-list' ) ); |
143 | } |
144 | foreach ( $this->readLineBatch( $file, $useIds ) as $pageIdentities ) { |
145 | yield from $pageIdentities; |
146 | } |
147 | } |
148 | } |
149 | |
150 | /** |
151 | * Read lines from the given file and return up to $batchSize page identities. |
152 | * @param SplFileObject $file |
153 | * @param bool $useIds Is the file a list of page IDs or titles? |
154 | * @return Generator<ProperPageIdentity[]> |
155 | */ |
156 | private function readLineBatch( SplFileObject $file, bool $useIds ) { |
157 | $titleParser = MediaWikiServices::getInstance()->getTitleParser(); |
158 | $pageStore = MediaWikiServices::getInstance()->getPageStore(); |
159 | $linkBatchFactory = MediaWikiServices::getInstance()->getLinkBatchFactory(); |
160 | $batchSize = $this->getBatchSize(); |
161 | $identifiers = []; |
162 | $logNext = true; |
163 | while ( !$file->eof() || $identifiers ) { |
164 | if ( count( $identifiers ) >= $batchSize || $file->eof() ) { |
165 | if ( $useIds ) { |
166 | yield $pageStore->newSelectQueryBuilder()->wherePageIds( $identifiers ) |
167 | ->fetchPageRecordArray(); |
168 | } else { |
169 | $linkBatch = $linkBatchFactory->newLinkBatch( $identifiers ); |
170 | $linkBatch->execute(); |
171 | yield $linkBatch->getPageIdentities(); |
172 | } |
173 | $identifiers = []; |
174 | $logNext = true; |
175 | } |
176 | if ( $file->eof() ) { |
177 | break; |
178 | } |
179 | $line = trim( $file->fgets() ); |
180 | // be forgiving with trailing empty lines |
181 | if ( $line === '' ) { |
182 | continue; |
183 | } |
184 | if ( $useIds ) { |
185 | if ( !preg_match( '/^[1-9]\d*$/', $line ) ) { |
186 | $this->error( "Invalid page ID: $line\n" ); |
187 | continue; |
188 | } else { |
189 | $identifiers[] = (int)$line; |
190 | } |
191 | } else { |
192 | try { |
193 | $identifiers[] = $titleParser->parseTitle( $line ); |
194 | } catch ( MalformedTitleException $e ) { |
195 | $this->error( "Invalid page title: $line\n" ); |
196 | continue; |
197 | } |
198 | } |
199 | if ( $logNext && $this->hasOption( 'verbose' ) ) { |
200 | $this->output( 'Processing batch starting with ' . $line . PHP_EOL ); |
201 | $logNext = false; |
202 | } |
203 | } |
204 | } |
205 | } |
206 | |
207 | $maintClass = UpdateWeightedTags::class; |
208 | require_once RUN_MAINTENANCE_IF_MAIN; |