Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
10.91% |
6 / 55 |
|
10.00% |
1 / 10 |
CRAP | |
0.00% |
0 / 1 |
LinksUpdate | |
10.91% |
6 / 55 |
|
10.00% |
1 / 10 |
274.27 | |
0.00% |
0 / 1 |
__construct | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
6 | |||
newPageChangeUpdate | |
0.00% |
0 / 9 |
|
0.00% |
0 / 1 |
12 | |||
newPastRevisionVisibilityChange | |
100.00% |
6 / 6 |
|
100.00% |
1 / 1 |
1 | |||
newPageRefreshUpdate | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
2 | |||
newSaneitizerUpdate | |
0.00% |
0 / 7 |
|
0.00% |
0 / 1 |
2 | |||
doJob | |
0.00% |
0 / 7 |
|
0.00% |
0 / 1 |
12 | |||
saneitize | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
update | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
queueIncomingLinksJobs | |
0.00% |
0 / 14 |
|
0.00% |
0 / 1 |
20 | |||
isPrioritized | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
6 |
1 | <?php |
2 | |
3 | namespace CirrusSearch\Job; |
4 | |
5 | use CirrusSearch\Updater; |
6 | use MediaWiki\MediaWikiServices; |
7 | use MediaWiki\Revision\RevisionRecord; |
8 | use MediaWiki\Title\Title; |
9 | use MediaWiki\Utils\MWTimestamp; |
10 | |
11 | /** |
12 | * Performs the appropriate updates to Elasticsearch after a LinksUpdate is |
13 | * completed. The page itself is updated first then a second copy of this job |
14 | * is queued to update linked articles if any links change. The job can be |
15 | * 'prioritized' via the 'prioritize' parameter which will switch it to a |
16 | * different queue then the non-prioritized jobs. Prioritized jobs will never |
17 | * be deduplicated with non-prioritized jobs which is good because we can't |
18 | * control which job is removed during deduplication. In our case it'd only be |
19 | * ok to remove the non-prioritized version. |
20 | * |
21 | * This program is free software; you can redistribute it and/or modify |
22 | * it under the terms of the GNU General Public License as published by |
23 | * the Free Software Foundation; either version 2 of the License, or |
24 | * (at your option) any later version. |
25 | * |
26 | * This program is distributed in the hope that it will be useful, |
27 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
28 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
29 | * GNU General Public License for more details. |
30 | * |
31 | * You should have received a copy of the GNU General Public License along |
32 | * with this program; if not, write to the Free Software Foundation, Inc., |
33 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
34 | * http://www.gnu.org/copyleft/gpl.html |
35 | */ |
36 | class LinksUpdate extends CirrusTitleJob { |
37 | /** |
38 | * param key to determine if the job should be "prioritized" |
39 | */ |
40 | private const PRIORITIZE = 'prioritize'; |
41 | |
42 | public function __construct( Title $title, array $params ) { |
43 | parent::__construct( $title, $params ); |
44 | |
45 | if ( $this->isPrioritized() ) { |
46 | $this->command .= 'Prioritized'; |
47 | } |
48 | // Note that we have to keep the prioritized param or else when the job |
49 | // is loaded it'll load under a different name/command/type which would |
50 | // be confusing. |
51 | } |
52 | |
53 | /** |
54 | * Prepare a page update for when this page is directly updated (new revision/delete/restore) |
55 | * |
56 | * @param Title $title |
57 | * @param RevisionRecord|null $revisionRecord |
58 | * @param array $params |
59 | * @return LinksUpdate |
60 | */ |
61 | public static function newPageChangeUpdate( Title $title, ?RevisionRecord $revisionRecord, array $params ): LinksUpdate { |
62 | if ( $revisionRecord !== null && $revisionRecord->getTimestamp() !== null ) { |
63 | $ts = (int)MWTimestamp::convert( TS_UNIX, $revisionRecord->getTimestamp() ); |
64 | } else { |
65 | $ts = MWTimestamp::time(); |
66 | } |
67 | $params += [ |
68 | self::PRIORITIZE => true, |
69 | self::UPDATE_KIND => self::PAGE_CHANGE, |
70 | self::ROOT_EVENT_TIME => $ts, |
71 | ]; |
72 | |
73 | return new self( $title, $params ); |
74 | } |
75 | |
76 | /** |
77 | * Prepare a cautionary update of a page that had some of its revision's visibility changed. |
78 | * (Theoretically not required because old revisions should not be part of the index) |
79 | * @param Title $title |
80 | * @return LinksUpdate |
81 | */ |
82 | public static function newPastRevisionVisibilityChange( Title $title ): LinksUpdate { |
83 | $params = [ |
84 | self::PRIORITIZE => true, |
85 | self::UPDATE_KIND => self::VISIBILITY_CHANGE, |
86 | self::ROOT_EVENT_TIME => MWTimestamp::time(), |
87 | ]; |
88 | |
89 | return new self( $title, $params ); |
90 | } |
91 | |
92 | /** |
93 | * Prepare a page update for when the rendered output of the page might have changed due to a |
94 | * change not directly related to this page (e.g. template update). |
95 | * |
96 | * @param Title $title |
97 | * @param array $params |
98 | * @return LinksUpdate |
99 | */ |
100 | public static function newPageRefreshUpdate( Title $title, array $params ): LinksUpdate { |
101 | $params += [ |
102 | self::PRIORITIZE => false, |
103 | self::UPDATE_KIND => self::PAGE_REFRESH, |
104 | self::ROOT_EVENT_TIME => MWTimestamp::time(), |
105 | ]; |
106 | return new self( $title, $params ); |
107 | } |
108 | |
109 | /** |
110 | * New change emitted from the saneitizer |
111 | * @param Title $title |
112 | * @param string|null $cluster optional target cluster, null for all clusters |
113 | * @return LinksUpdate |
114 | */ |
115 | public static function newSaneitizerUpdate( Title $title, ?string $cluster ): LinksUpdate { |
116 | $params = [ |
117 | self::PRIORITIZE => false, |
118 | self::UPDATE_KIND => self::SANEITIZER, |
119 | self::ROOT_EVENT_TIME => MWTimestamp::time(), |
120 | self::CLUSTER => $cluster |
121 | ]; |
122 | return new self( $title, $params ); |
123 | } |
124 | |
125 | /** |
126 | * @return bool |
127 | */ |
128 | protected function doJob() { |
129 | $updater = Updater::build( $this->getSearchConfig(), $this->params['cluster'] ?? null ); |
130 | if ( $this->params[self::UPDATE_KIND] === self::SANEITIZER ) { |
131 | $this->saneitize( $updater ); |
132 | } else { |
133 | $this->update( $updater ); |
134 | } |
135 | |
136 | if ( $this->getSearchConfig()->get( 'CirrusSearchEnableIncomingLinkCounting' ) ) { |
137 | $this->queueIncomingLinksJobs(); |
138 | } |
139 | |
140 | return true; |
141 | } |
142 | |
143 | /** |
144 | * Indirection doing technically nothing but help measure the impact of these jobs via flame graphs. |
145 | * @param Updater $updater |
146 | * @return void |
147 | */ |
148 | private function saneitize( Updater $updater ): void { |
149 | $this->update( $updater ); |
150 | } |
151 | |
152 | private function update( Updater $updater ): void { |
153 | $updater->updateFromTitle( $this->title, $this->params[self::UPDATE_KIND], $this->params[self::ROOT_EVENT_TIME] ); |
154 | } |
155 | |
156 | /** |
157 | * Queue IncomingLinkCount jobs when pages are newly linked or unlinked |
158 | */ |
159 | private function queueIncomingLinksJobs() { |
160 | $titleKeys = array_merge( $this->params[ 'addedLinks' ] ?? [], |
161 | $this->params[ 'removedLinks' ] ?? [] ); |
162 | $refreshInterval = $this->getSearchConfig()->get( 'CirrusSearchRefreshInterval' ); |
163 | $jobs = []; |
164 | $jobQueue = MediaWikiServices::getInstance()->getJobQueueGroup(); |
165 | foreach ( $titleKeys as $titleKey ) { |
166 | $title = Title::newFromDBkey( $titleKey ); |
167 | if ( !$title || !$title->canExist() ) { |
168 | continue; |
169 | } |
170 | // If possible, delay the job execution by a few seconds so Elasticsearch |
171 | // can refresh to contain what we just sent it. The delay should be long |
172 | // enough for Elasticsearch to complete the refresh cycle, which normally |
173 | // takes wgCirrusSearchRefreshInterval seconds but we double it and add |
174 | // one just in case. |
175 | $delay = 2 * $refreshInterval + 1; |
176 | $jobs[] = new IncomingLinkCount( $title, [ |
177 | 'cluster' => $this->params['cluster'], |
178 | ] + self::buildJobDelayOptions( IncomingLinkCount::class, $delay, $jobQueue ) ); |
179 | } |
180 | $jobQueue->push( $jobs ); |
181 | } |
182 | |
183 | /** |
184 | * @return bool Is this job prioritized? |
185 | */ |
186 | public function isPrioritized() { |
187 | return isset( $this->params[self::PRIORITIZE] ) && $this->params[self::PRIORITIZE]; |
188 | } |
189 | } |