Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
4.97% |
8 / 161 |
|
0.00% |
0 / 12 |
CRAP | |
0.00% |
0 / 1 |
RunSearch | |
5.19% |
8 / 154 |
|
0.00% |
0 / 12 |
1770.53 | |
0.00% |
0 / 1 |
__construct | |
0.00% |
0 / 15 |
|
0.00% |
0 / 1 |
2 | |||
finalSetup | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
execute | |
0.00% |
0 / 8 |
|
0.00% |
0 / 1 |
6 | |||
loadChangeableConfigVars | |
0.00% |
0 / 8 |
|
0.00% |
0 / 1 |
12 | |||
applyGlobals | |
0.00% |
0 / 13 |
|
0.00% |
0 / 1 |
56 | |||
changeGlobalKeyPath | |
80.00% |
8 / 10 |
|
0.00% |
0 / 1 |
5.20 | |||
consume | |
0.00% |
0 / 18 |
|
0.00% |
0 / 1 |
56 | |||
processResultSet | |
0.00% |
0 / 27 |
|
0.00% |
0 / 1 |
30 | |||
processSuggestionSet | |
0.00% |
0 / 11 |
|
0.00% |
0 / 1 |
6 | |||
processArchiveResult | |
0.00% |
0 / 11 |
|
0.00% |
0 / 1 |
6 | |||
searchArchive | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
searchFor | |
0.00% |
0 / 29 |
|
0.00% |
0 / 1 |
90 |
1 | <?php |
2 | |
3 | namespace CirrusSearch\Maintenance; |
4 | |
5 | use CirrusSearch\CirrusDebugOptions; |
6 | use CirrusSearch\CirrusSearch; |
7 | use CirrusSearch\HashSearchConfig; |
8 | use CirrusSearch\Search\CirrusSearchResultSet; |
9 | use CirrusSearch\SearchConfig; |
10 | use MediaWiki\Maintenance\OrderedStreamingForkController; |
11 | use MediaWiki\MediaWikiServices; |
12 | use MediaWiki\Settings\SettingsBuilder; |
13 | use MediaWiki\Status\Status; |
14 | use PageArchive; |
15 | use SearchSuggestionSet; |
16 | use Wikimedia\Rdbms\IResultWrapper; |
17 | |
18 | /** |
19 | * Run search queries provided on stdin |
20 | * |
21 | * This program is free software; you can redistribute it and/or modify |
22 | * it under the terms of the GNU General Public License as published by |
23 | * the Free Software Foundation; either version 2 of the License, or |
24 | * (at your option) any later version. |
25 | * |
26 | * This program is distributed in the hope that it will be useful, |
27 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
28 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
29 | * GNU General Public License for more details. |
30 | * |
31 | * You should have received a copy of the GNU General Public License along |
32 | * with this program; if not, write to the Free Software Foundation, Inc., |
33 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
34 | * http://www.gnu.org/copyleft/gpl.html |
35 | */ |
36 | |
37 | $IP = getenv( 'MW_INSTALL_PATH' ); |
38 | if ( $IP === false ) { |
39 | $IP = __DIR__ . '/../../..'; |
40 | } |
41 | require_once "$IP/maintenance/Maintenance.php"; |
42 | require_once __DIR__ . '/../includes/Maintenance/Maintenance.php'; |
43 | |
44 | class RunSearch extends Maintenance { |
45 | |
46 | /** |
47 | * @var string |
48 | */ |
49 | protected $indexBaseName; |
50 | |
51 | public function __construct() { |
52 | parent::__construct(); |
53 | $this->addDescription( 'Run one or more searches against the specified cluster. ' . |
54 | 'search queries are read from stdin.' ); |
55 | $this->addOption( 'baseName', 'What basename to use for all indexes, ' . |
56 | 'defaults to wiki id', false, true ); |
57 | $this->addOption( 'type', 'What type of search to run, prefix, suggest, archive or full_text. ' . |
58 | 'defaults to full_text.', false, true ); |
59 | $this->addOption( 'options', 'A JSON object mapping from global variable to ' . |
60 | 'its test value', false, true ); |
61 | $this->addOption( 'fork', 'Fork multiple processes to run queries from.' . |
62 | 'defaults to false.', false, true ); |
63 | $this->addOption( 'decode', 'urldecode() queries before running them', false, false ); |
64 | $this->addOption( 'explain', 'Include lucene explanation in the results', false, false ); |
65 | $this->addOption( 'limit', 'Set the max number of results returned by query (defaults to 10)', false, true ); |
66 | $this->addOption( 'i-know-what-im-doing', 'Allow setting unknown options from --options', false, false ); |
67 | } |
68 | |
69 | public function finalSetup( SettingsBuilder $settingsBuilder ) { |
70 | parent::finalSetup( $settingsBuilder ); |
71 | $this->applyGlobals(); |
72 | } |
73 | |
74 | public function execute() { |
75 | $this->disablePoolCountersAndLogging(); |
76 | $this->indexBaseName = $this->getOption( 'baseName', $this->getSearchConfig()->get( SearchConfig::INDEX_BASE_NAME ) ); |
77 | |
78 | $callback = [ $this, 'consume' ]; |
79 | $forks = $this->getOption( 'fork', false ); |
80 | $forks = ctype_digit( $forks ) ? (int)$forks : 0; |
81 | $controller = new OrderedStreamingForkController( $forks, $callback, STDIN, STDOUT ); |
82 | $controller->start(); |
83 | |
84 | return true; |
85 | } |
86 | |
87 | /** |
88 | * To keep life sane this shouldn't be able to set completely arbitrary configuration, only |
89 | * the options that change search ranking. CirrusSearch has so many variables that enumerating |
90 | * them and maintaining extra lists of them would be a tedious process. |
91 | * |
92 | * @return array<string,true> Changeable global variables represented as the keys for an array, for |
93 | * use with isset(). |
94 | */ |
95 | private function loadChangeableConfigVars(): array { |
96 | // WARNING: The autoloader isn't available yet, you can't use any mw/cirrus classes |
97 | $config = json_decode( file_get_contents( __DIR__ . '/../extension.json' ), true ); |
98 | if ( !is_array( $config ) ) { |
99 | throw new \RuntimeException( 'Could not load extension.json for gathering the ' |
100 | . 'list of changeable config vars' ); |
101 | } |
102 | $changeable = []; |
103 | foreach ( $config['config'] as $key => $_ ) { |
104 | $changeable['wg' . $key] = true; |
105 | } |
106 | return $changeable; |
107 | } |
108 | |
109 | /** |
110 | * Applies global variables provided as the options CLI argument |
111 | * to override current settings. |
112 | */ |
113 | protected function applyGlobals() { |
114 | $optionsData = $this->getOption( 'options', 'false' ); |
115 | if ( substr_compare( $optionsData, 'B64://', 0, strlen( 'B64://' ) ) === 0 ) { |
116 | $optionsData = base64_decode( substr( $optionsData, strlen( 'B64://' ) ) ); |
117 | } |
118 | $options = json_decode( $optionsData, true ); |
119 | $changeable = $this->loadChangeableConfigVars(); |
120 | |
121 | if ( $options ) { |
122 | // TODO: This function needs to be called from Maintenance::finalSetup, otherwise the |
123 | // config changes are applied too late to make it into various structures created on |
124 | // initialization. This is particularly a problem with wikidata integration. Or at |
125 | // least it was in Sept 2018. See ce3cf5fc52e4fade6e35fa38093180ae7397fee2. |
126 | // Unfortunately, as of March 2020, default values from extension.json are *not* |
127 | // available when Maintenance::finalSetup is called. This means you can only modify |
128 | // explicitly configured values, anything that still has default values cannot be |
129 | // changed. |
130 | $forceChange = $this->getOption( 'i-know-what-im-doing', false ); |
131 | foreach ( $options as $key => $value ) { |
132 | if ( strpos( $key, '.' ) !== false ) { |
133 | $this->changeGlobalKeyPath( $key, $value, $changeable ); |
134 | } elseif ( $forceChange || isset( $changeable[$key] ) ) { |
135 | // This is different from the keypath case above in that this can set |
136 | // variables that haven't been loaded yet. In particular at this point |
137 | // in the MW load process explicitly configured variables are |
138 | // available, but defaults from extension.json have not yet been |
139 | // loaded. |
140 | $GLOBALS[$key] = $value; |
141 | } else { |
142 | $this->fatalError( "\nERROR: $key is not a globally changeable variable\n" ); |
143 | } |
144 | } |
145 | } |
146 | } |
147 | |
148 | /** |
149 | * Navigate a key path to change a global variable. |
150 | * |
151 | * @param string $key the path |
152 | * @param mixed $value what we want to set it to |
153 | * @param array<string,true> $changeable the changeable variables |
154 | */ |
155 | private function changeGlobalKeyPath( string $key, $value, array $changeable ): void { |
156 | // key path |
157 | $path = explode( '.', $key ); |
158 | $pathel = array_shift( $path ); |
159 | if ( !isset( $changeable[$pathel] ) ) { |
160 | $this->fatalError( "\nERROR: $key is not a globally changeable variable\n" ); |
161 | } |
162 | |
163 | $cur =& $GLOBALS[$pathel]; |
164 | foreach ( $path as $pathel ) { |
165 | if ( !is_array( $cur ) || !array_key_exists( $pathel, $cur ) ) { |
166 | $this->fatalError( "\nERROR: $key is not a valid global variable path\n" ); |
167 | } |
168 | $cur =& $cur[$pathel]; |
169 | } |
170 | $cur = $value; |
171 | } |
172 | |
173 | /** |
174 | * Transform the search request into a JSON string representing the |
175 | * search result. |
176 | * |
177 | * @param string $query |
178 | * @return string JSON object |
179 | */ |
180 | public function consume( $query ) { |
181 | if ( $this->getOption( 'decode' ) ) { |
182 | $query = urldecode( $query ); |
183 | } |
184 | $data = [ 'query' => $query ]; |
185 | $status = $this->searchFor( $query ); |
186 | if ( $status->isOK() ) { |
187 | $value = $status->getValue(); |
188 | if ( $value instanceof IResultWrapper ) { |
189 | // Archive search results |
190 | $data += $this->processArchiveResult( $value ); |
191 | } elseif ( $value instanceof CirrusSearchResultSet ) { |
192 | $data += $this->processResultSet( $value, $query ); |
193 | } elseif ( $value instanceof SearchSuggestionSet ) { |
194 | // these are suggestion results |
195 | $data += $this->processSuggestionSet( $value ); |
196 | } else { |
197 | throw new \RuntimeException( |
198 | 'Unknown result type: ' |
199 | . ( is_object( $value ) ? get_class( $value ) : gettype( $value ) ) |
200 | ); |
201 | } |
202 | } else { |
203 | $data['error'] = $status->getMessage()->text(); |
204 | } |
205 | return json_encode( $data ); |
206 | } |
207 | |
208 | /** |
209 | * Extract data from a search result set. |
210 | * @param CirrusSearchResultSet $value |
211 | * @param string $query |
212 | * @return array |
213 | */ |
214 | protected function processResultSet( CirrusSearchResultSet $value, $query ) { |
215 | // these are prefix or full text results |
216 | $rows = []; |
217 | foreach ( $value as $result ) { |
218 | /** @var CirrusSearch\Search\CirrusSearchResult $result */ |
219 | $row = [ |
220 | // use getDocId() rather than asking the title to allow this script |
221 | // to work when a production index has been imported to a test es instance |
222 | 'docId' => $result->getDocId(), |
223 | 'title' => $result->getTitle()->getPrefixedText(), |
224 | 'score' => $result->getScore(), |
225 | 'snippets' => [ |
226 | 'text' => $result->getTextSnippet(), |
227 | 'title' => $result->getTitleSnippet(), |
228 | 'redirect' => $result->getRedirectSnippet(), |
229 | 'section' => $result->getSectionSnippet(), |
230 | 'category' => $result->getCategorySnippet(), |
231 | ], |
232 | 'explanation' => $result->getExplanation(), |
233 | 'extra' => $result->getExtensionData(), |
234 | ]; |
235 | $img = $result->getFile() ?: MediaWikiServices::getInstance()->getRepoGroup() |
236 | ->findFile( $result->getTitle() ); |
237 | if ( $img ) { |
238 | $thumb = $img->transform( [ 'width' => 120, 'height' => 120 ] ); |
239 | if ( $thumb ) { |
240 | $row['thumb_url'] = $thumb->getUrl(); |
241 | } |
242 | } |
243 | $rows[] = $row; |
244 | } |
245 | return [ |
246 | 'totalHits' => $value->getTotalHits(), |
247 | 'rows' => $rows, |
248 | ]; |
249 | } |
250 | |
251 | /** |
252 | * Extract data from a search suggestions set. |
253 | * @param SearchSuggestionSet $value |
254 | * @return array |
255 | */ |
256 | protected function processSuggestionSet( SearchSuggestionSet $value ) { |
257 | $rows = []; |
258 | foreach ( $value->getSuggestions() as $suggestion ) { |
259 | $rows[] = [ |
260 | 'pageId' => $suggestion->getSuggestedTitleID(), |
261 | 'title' => $suggestion->getSuggestedTitle()->getPrefixedText(), |
262 | 'snippets' => [], |
263 | ]; |
264 | } |
265 | return [ |
266 | 'totalHits' => $value->getSize(), |
267 | 'rows' => $rows, |
268 | ]; |
269 | } |
270 | |
271 | /** |
272 | * Extract data from archive search results. |
273 | * @param IResultWrapper $value |
274 | * @return array |
275 | */ |
276 | protected function processArchiveResult( IResultWrapper $value ) { |
277 | $rows = []; |
278 | foreach ( $value as $row ) { |
279 | $rows[] = [ |
280 | 'title' => $row->ar_title, |
281 | 'namespace' => $row->ar_namespace, |
282 | 'count' => $row->count, |
283 | ]; |
284 | } |
285 | return [ |
286 | 'totalHits' => $value->numRows(), |
287 | 'rows' => $rows, |
288 | ]; |
289 | } |
290 | |
291 | /** |
292 | * Search for term in the archive. |
293 | * @param string $query |
294 | * @return Status<IResultWrapper> |
295 | */ |
296 | protected function searchArchive( $query ) { |
297 | $result = PageArchive::listPagesBySearch( $query ); |
298 | return Status::newGood( $result ); |
299 | } |
300 | |
301 | /** |
302 | * Transform the search request into a Status object representing the |
303 | * search result. Varies based on CLI input argument `type`. |
304 | * |
305 | * @param string $query |
306 | * @return Status<CirrusSearch\Search\CirrusSearchResultSet|SearchSuggestionSet|IResultWrapper> |
307 | */ |
308 | protected function searchFor( $query ) { |
309 | $searchType = $this->getOption( 'type', 'full_text' ); |
310 | |
311 | if ( $searchType === 'archive' ) { |
312 | // Archive has its own engine so go directly there |
313 | return $this->searchArchive( $query ); |
314 | } |
315 | |
316 | $limit = $this->getOption( 'limit', 10 ); |
317 | $options = CirrusDebugOptions::forRelevanceTesting( |
318 | $this->getOption( 'explain', false ) ? 'raw' : null |
319 | ); |
320 | |
321 | $config = new HashSearchConfig( [ SearchConfig::INDEX_BASE_NAME => $this->indexBaseName ], |
322 | [ HashSearchConfig::FLAG_INHERIT ] ); |
323 | $engine = new CirrusSearch( $config, $options ); |
324 | $namespaces = array_keys( $engine->getConfig()->get( 'NamespacesToBeSearchedDefault' ), true ); |
325 | $engine->setNamespaces( $namespaces ); |
326 | |
327 | $engine->setConnection( $this->getConnection() ); |
328 | $engine->setLimitOffset( $limit ); |
329 | |
330 | switch ( $searchType ) { |
331 | case 'full_text': |
332 | // @todo pass through $this->getConnection() ? |
333 | $result = $engine->searchText( $query ); |
334 | if ( $result instanceof Status ) { |
335 | return $result; |
336 | } else { |
337 | return Status::newGood( $result ); |
338 | } |
339 | |
340 | case 'prefix': |
341 | $titles = $engine->defaultPrefixSearch( $query ); |
342 | $resultSet = SearchSuggestionSet::fromTitles( $titles ); |
343 | return Status::newGood( $resultSet ); |
344 | |
345 | case 'suggest': |
346 | $result = $engine->completionSearch( $query ); |
347 | if ( $result instanceof Status ) { |
348 | return $result; |
349 | } else { |
350 | return Status::newGood( $result ); |
351 | } |
352 | |
353 | default: |
354 | $this->fatalError( "\nERROR: Unknown search type $searchType\n" ); |
355 | } |
356 | } |
357 | } |
358 | |
359 | $maintClass = RunSearch::class; |
360 | require_once RUN_MAINTENANCE_IF_MAIN; |