Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
68.60% |
59 / 86 |
|
50.00% |
4 / 8 |
CRAP | |
0.00% |
0 / 1 |
OtherIndexesUpdater | |
68.60% |
59 / 86 |
|
50.00% |
4 / 8 |
39.37 | |
0.00% |
0 / 1 |
__construct | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
buildOtherIndexesUpdater | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
getExternalIndexes | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
2 | |||
getExtraIndexesForNamespaces | |
100.00% |
7 / 7 |
|
100.00% |
1 / 1 |
5 | |||
updateOtherIndex | |
92.68% |
38 / 41 |
|
0.00% |
0 / 1 |
9.03 | |||
runUpdates | |
0.00% |
0 / 15 |
|
0.00% |
0 / 1 |
6 | |||
logFailure | |
0.00% |
0 / 7 |
|
0.00% |
0 / 1 |
6 | |||
queryForTitle | |
100.00% |
7 / 7 |
|
100.00% |
1 / 1 |
1 |
1 | <?php |
2 | |
3 | namespace CirrusSearch; |
4 | |
5 | use Elastica\Multi\ResultSet; |
6 | use Elastica\Multi\Search as MultiSearch; |
7 | use MediaWiki\Logger\LoggerFactory; |
8 | use MediaWiki\Title\Title; |
9 | |
10 | /** |
11 | * Tracks whether a Title is known on other indexes. |
12 | * |
13 | * This program is free software; you can redistribute it and/or modify |
14 | * it under the terms of the GNU General Public License as published by |
15 | * the Free Software Foundation; either version 2 of the License, or |
16 | * (at your option) any later version. |
17 | * |
18 | * This program is distributed in the hope that it will be useful, |
19 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
20 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
21 | * GNU General Public License for more details. |
22 | * |
23 | * You should have received a copy of the GNU General Public License along |
24 | * with this program; if not, write to the Free Software Foundation, Inc., |
25 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
26 | * http://www.gnu.org/copyleft/gpl.html |
27 | */ |
28 | class OtherIndexesUpdater extends Updater { |
29 | /** @var string Local site we're tracking */ |
30 | private $localSite; |
31 | |
32 | /** |
33 | * @param Connection $readConnection |
34 | * @param string|null $writeToClusterName |
35 | * @param string $localSite |
36 | */ |
37 | public function __construct( Connection $readConnection, $writeToClusterName, $localSite ) { |
38 | parent::__construct( $readConnection, $writeToClusterName ); |
39 | $this->localSite = $localSite; |
40 | } |
41 | |
42 | /** |
43 | * @param SearchConfig $config |
44 | * @param string|null $cluster |
45 | * @param string $localSite |
46 | * @return OtherIndexesUpdater |
47 | */ |
48 | public static function buildOtherIndexesUpdater( SearchConfig $config, $cluster, $localSite ): OtherIndexesUpdater { |
49 | $connection = Connection::getPool( $config, $cluster ); |
50 | return new self( $connection, $cluster, $localSite ); |
51 | } |
52 | |
53 | /** |
54 | * Get the external index identifiers for title. |
55 | * @param SearchConfig $config |
56 | * @param Title $title |
57 | * @param string|null $cluster cluster (as in CirrusSearchWriteClusters) to filter on |
58 | * @return ExternalIndex[] array of external indices. |
59 | */ |
60 | public static function getExternalIndexes( SearchConfig $config, Title $title, $cluster = null ) { |
61 | $namespace = $title->getNamespace(); |
62 | $indices = []; |
63 | foreach ( $config->get( 'CirrusSearchExtraIndexes' )[$namespace] ?? [] as $indexName ) { |
64 | $indices[] = new ExternalIndex( $config, $indexName ); |
65 | } |
66 | return $indices; |
67 | } |
68 | |
69 | /** |
70 | * Get any extra indexes to query, if any, based on namespaces |
71 | * @param SearchConfig $config |
72 | * @param int[] $namespaces An array of namespace ids |
73 | * @return ExternalIndex[] array of indexes |
74 | */ |
75 | public static function getExtraIndexesForNamespaces( SearchConfig $config, array $namespaces ) { |
76 | $extraIndexes = []; |
77 | foreach ( $config->get( 'CirrusSearchExtraIndexes' ) ?: [] as $namespace => $indexes ) { |
78 | if ( !in_array( $namespace, $namespaces ) ) { |
79 | continue; |
80 | } |
81 | foreach ( $indexes as $indexName ) { |
82 | $extraIndexes[] = new ExternalIndex( $config, $indexName ); |
83 | } |
84 | } |
85 | return $extraIndexes; |
86 | } |
87 | |
88 | /** |
89 | * Update the indexes for other wiki that also store information about $titles. |
90 | * @param Title[] $titles array of titles in other indexes to update |
91 | */ |
92 | public function updateOtherIndex( $titles ) { |
93 | if ( !$this->connection->getConfig()->getElement( 'CirrusSearchWikimediaExtraPlugin', 'super_detect_noop' ) ) { |
94 | $this->logFailure( $titles, 'super_detect_noop plugin not enabled' ); |
95 | return; |
96 | } |
97 | |
98 | $updates = []; |
99 | |
100 | // Build multisearch to find ids to update |
101 | $findIdsMultiSearch = new MultiSearch( $this->connection->getClient() ); |
102 | $findIdsClosures = []; |
103 | $readClusterName = $this->connection->getConfig()->getClusterAssignment()->getCrossClusterName(); |
104 | foreach ( $titles as $title ) { |
105 | foreach ( self::getExternalIndexes( $this->connection->getConfig(), $title ) as $otherIndex ) { |
106 | $searchIndex = $otherIndex->getSearchIndex( $readClusterName ); |
107 | $query = $this->queryForTitle( $title ); |
108 | $search = $this->connection->getIndex( $searchIndex )->createSearch( $query ); |
109 | $findIdsMultiSearch->addSearch( $search ); |
110 | $findIdsClosures[] = static function ( $docId ) use ( $otherIndex, &$updates, $title ) { |
111 | // The searchIndex, including the cluster specified, is needed |
112 | // as this gets passed to the ExternalIndex constructor in |
113 | // the created jobs. |
114 | if ( !isset( $updates[spl_object_hash( $otherIndex )] ) ) { |
115 | $updates[spl_object_hash( $otherIndex )] = [ $otherIndex, [] ]; |
116 | } |
117 | $updates[spl_object_hash( $otherIndex )][1][] = [ |
118 | 'docId' => $docId, |
119 | 'ns' => $title->getNamespace(), |
120 | 'dbKey' => $title->getDBkey(), |
121 | ]; |
122 | }; |
123 | } |
124 | } |
125 | $findIdsClosuresCount = count( $findIdsClosures ); |
126 | if ( $findIdsClosuresCount === 0 ) { |
127 | // No other indexes to check. |
128 | return; |
129 | } |
130 | |
131 | // Look up the ids and run all closures to build the list of updates |
132 | $result = $this->runMSearch( |
133 | $findIdsMultiSearch, |
134 | new MultiSearchRequestLog( |
135 | $this->connection->getClient(), |
136 | 'searching for {numIds} ids in other indexes', |
137 | 'other_idx_lookup', |
138 | [ 'numIds' => $findIdsClosuresCount ] |
139 | ) |
140 | ); |
141 | if ( $result->isGood() ) { |
142 | /** @var ResultSet $findIdsMultiSearchResult */ |
143 | $findIdsMultiSearchResult = $result->getValue(); |
144 | foreach ( $findIdsClosures as $i => $closure ) { |
145 | $results = $findIdsMultiSearchResult[$i]->getResults(); |
146 | if ( count( $results ) ) { |
147 | $closure( $results[0]->getId() ); |
148 | } |
149 | } |
150 | $this->runUpdates( reset( $titles ), $updates ); |
151 | } |
152 | } |
153 | |
154 | /** |
155 | * @param Title $title |
156 | * @param array $updates |
157 | * @return void |
158 | */ |
159 | protected function runUpdates( Title $title, array $updates ): void { |
160 | // These are split into a job per index because the external indexes |
161 | // may be configured to write to different clusters. This maintains |
162 | // isolation of writes between clusters so one slow cluster doesn't |
163 | // drag down the others. |
164 | foreach ( $updates as [ $otherIndex, $actions ] ) { |
165 | $this->pushElasticaWriteJobs( |
166 | UpdateGroup::PAGE, |
167 | $actions, |
168 | function ( array $chunk, ClusterSettings $cluster ) use ( $otherIndex ) { |
169 | // Name of the index to write to on whatever cluster is connected to |
170 | $indexName = $otherIndex->getIndexName(); |
171 | // Index name and, potentially, a replica group identifier. Needed to |
172 | // create an appropriate ExternalIndex instance in the job. |
173 | $externalIndex = $otherIndex->getGroupAndIndexName(); |
174 | return Job\ElasticaWrite::build( |
175 | $cluster, |
176 | UpdateGroup::PAGE, |
177 | 'sendOtherIndexUpdates', |
178 | [ $this->localSite, $indexName, $chunk ], |
179 | [ 'external-index' => $externalIndex ], |
180 | ); |
181 | } ); |
182 | } |
183 | } |
184 | |
185 | /** |
186 | * @param Title[] $titles |
187 | * @param string $reason |
188 | */ |
189 | private function logFailure( array $titles, $reason = '' ) { |
190 | $articleIDs = array_map( static function ( Title $title ) { |
191 | return $title->getArticleID(); |
192 | }, $titles ); |
193 | if ( $reason ) { |
194 | $reason = " ($reason)"; |
195 | } |
196 | LoggerFactory::getInstance( 'CirrusSearchChangeFailed' )->info( |
197 | "Other Index$reason for article ids: " . implode( ',', $articleIDs ) ); |
198 | } |
199 | |
200 | /** |
201 | * @param Title $title |
202 | * @return \Elastica\Query |
203 | */ |
204 | private function queryForTitle( Title $title ) { |
205 | $bool = new \Elastica\Query\BoolQuery(); |
206 | |
207 | // Note that we need to use the keyword indexing of title so the analyzer gets out of the way. |
208 | $bool->addFilter( new \Elastica\Query\Term( [ 'title.keyword' => $title->getText() ] ) ); |
209 | $bool->addFilter( new \Elastica\Query\Term( [ 'namespace' => $title->getNamespace() ] ) ); |
210 | |
211 | $query = new \Elastica\Query( $bool ); |
212 | $query->setStoredFields( [] ); // We only need the _id so don't load the _source |
213 | $query->setSize( 1 ); |
214 | |
215 | return $query; |
216 | } |
217 | |
218 | } |