Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
91.67% |
44 / 48 |
|
0.00% |
0 / 1 |
CRAP | |
0.00% |
0 / 1 |
BacklinkJobUtils | |
93.62% |
44 / 47 |
|
0.00% |
0 / 1 |
10.03 | |
0.00% |
0 / 1 |
partitionBacklinkJob | |
93.62% |
44 / 47 |
|
0.00% |
0 / 1 |
10.03 |
1 | <?php |
2 | /** |
3 | * This program is free software; you can redistribute it and/or modify |
4 | * it under the terms of the GNU General Public License as published by |
5 | * the Free Software Foundation; either version 2 of the License, or |
6 | * (at your option) any later version. |
7 | * |
8 | * This program is distributed in the hope that it will be useful, |
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
11 | * GNU General Public License for more details. |
12 | * |
13 | * You should have received a copy of the GNU General Public License along |
14 | * with this program; if not, write to the Free Software Foundation, Inc., |
15 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
16 | * http://www.gnu.org/copyleft/gpl.html |
17 | * |
18 | * @file |
19 | */ |
20 | |
21 | namespace MediaWiki\JobQueue\Utils; |
22 | |
23 | use MediaWiki\JobQueue\Job; |
24 | use MediaWiki\MediaWikiServices; |
25 | use MediaWiki\Page\PageIdentity; |
26 | |
27 | /** |
28 | * Helper for a Job that updates links to a given page title. |
29 | * |
30 | * When an asset changes, a base job can be inserted to update all assets that depend on it. |
31 | * The base job splits into per-title "leaf" jobs and a "remnant" job to handle the remaining |
32 | * range of backlinks. This recurs until the remnant job's backlink range is small enough that |
33 | * only leaf jobs are created from it. |
34 | * |
35 | * For example, if templates A and B are edited (at the same time) the queue will have: |
36 | * (A base, B base) |
37 | * When these jobs run, the queue will have per-title and remnant partition jobs: |
38 | * (titleX,titleY,titleZ,...,A remnant,titleM,titleN,titleO,...,B remnant) |
39 | * |
40 | * This works best when the queue is FIFO, for several reasons: |
41 | * - a) Since the remnant jobs are enqueued after the leaf jobs, the slower leaf jobs have to |
42 | * get popped prior to the fast remnant jobs. This avoids flooding the queue with leaf jobs |
43 | * for every single backlink of widely used assets (which can be millions). |
44 | * - b) Other jobs going in the queue still get a chance to run after a widely used asset changes. |
45 | * This is due to the large remnant job pushing to the end of the queue with each division. |
46 | * |
47 | * The size of the queues used in this manner depend on the number of assets changes and the |
48 | * number of workers. Also, with FIFO-per-partition queues, the queue size can be somewhat larger, |
49 | * depending on the number of queue partitions. |
50 | * |
51 | * @since 1.23 |
52 | * @ingroup JobQueue |
53 | */ |
54 | class BacklinkJobUtils { |
55 | /** |
56 | * Break down $job into approximately ($bSize/$cSize) leaf jobs and a single partition |
57 | * job that covers the remaining backlink range (if needed). Jobs for the first $bSize |
58 | * titles are collated ($cSize per job) into leaf jobs to do actual work. All the |
59 | * resulting jobs are of the same class as $job. No partition job is returned if the |
60 | * range covered by $job was less than $bSize, as the leaf jobs have full coverage. |
61 | * |
62 | * The leaf jobs have the 'pages' param set to a (<page ID>:(<namespace>,<DB key>),...) |
63 | * map so that the run() function knows what pages to act on. The leaf jobs will keep |
64 | * the same job title as the parent job (e.g. $job). |
65 | * |
66 | * The partition jobs have the 'range' parameter set to a map of the format |
67 | * (start:<integer>, end:<integer>, batchSize:<integer>, subranges:((<start>,<end>),...)), |
68 | * the 'table' parameter set to that of $job, and the 'recursive' parameter set to true. |
69 | * This method can be called on the resulting job to repeat the process again. |
70 | * |
71 | * The job provided ($job) must have the 'recursive' parameter set to true and the 'table' |
72 | * parameter must be set to a backlink table. The job title will be used as the title to |
73 | * find backlinks for. Any 'range' parameter must follow the same format as mentioned above. |
74 | * This should be managed by recursive calls to this method. |
75 | * |
76 | * The first jobs return are always the leaf jobs. This lets the caller use push() to |
77 | * put them directly into the queue and works well if the queue is FIFO. In such a queue, |
78 | * the leaf jobs have to get finished first before anything can resolve the next partition |
79 | * job, which keeps the queue very small. |
80 | * |
81 | * $opts includes: |
82 | * - params : extra job parameters to include in each job |
83 | * |
84 | * @param Job $job |
85 | * @param int $bSize BacklinkCache partition size; usually $wgUpdateRowsPerJob |
86 | * @param int $cSize Max titles per leaf job; Usually 1 or a modest value |
87 | * @param array $opts Optional parameter map |
88 | * @return Job[] |
89 | */ |
90 | public static function partitionBacklinkJob( Job $job, $bSize, $cSize, $opts = [] ) { |
91 | $class = get_class( $job ); |
92 | $title = $job->getTitle(); |
93 | $params = $job->getParams(); |
94 | |
95 | $backlinkCache = MediaWikiServices::getInstance()->getBacklinkCacheFactory() |
96 | ->getBacklinkCache( $title ); |
97 | if ( isset( $params['pages'] ) || empty( $params['recursive'] ) ) { |
98 | // this is a leaf node |
99 | $ranges = []; |
100 | $realBSize = 0; |
101 | wfWarn( __METHOD__ . " called on {$job->getType()} leaf job (explosive recursion)." ); |
102 | } elseif ( isset( $params['range'] ) ) { |
103 | // This is a range job to trigger the insertion of partitioned/title jobs... |
104 | $ranges = $params['range']['subranges']; |
105 | $realBSize = $params['range']['batchSize']; |
106 | } else { |
107 | // This is a base job to trigger the insertion of partitioned jobs... |
108 | $ranges = $backlinkCache->partition( $params['table'], $bSize ); |
109 | $realBSize = $bSize; |
110 | } |
111 | |
112 | $extraParams = $opts['params'] ?? []; |
113 | |
114 | $jobs = []; |
115 | // Combine the first range (of size $bSize) backlinks into leaf jobs |
116 | if ( isset( $ranges[0] ) ) { |
117 | $start = $ranges[0][0]; |
118 | $end = isset( $ranges[1] ) ? $ranges[1][0] - 1 : false; |
119 | |
120 | $iter = $backlinkCache->getLinkPages( $params['table'], $start, $end ); |
121 | $pageSources = iterator_to_array( $iter ); |
122 | /** @var PageIdentity[] $pageBatch */ |
123 | foreach ( array_chunk( $pageSources, $cSize ) as $pageBatch ) { |
124 | $pages = []; |
125 | foreach ( $pageBatch as $page ) { |
126 | $pages[$page->getId()] = [ $page->getNamespace(), $page->getDBkey() ]; |
127 | } |
128 | $jobs[] = new $class( |
129 | $title, // maintain parent job title |
130 | [ 'pages' => $pages ] + $extraParams |
131 | ); |
132 | } |
133 | } |
134 | // Take all of the remaining ranges and build a partition job from it |
135 | if ( isset( $ranges[1] ) ) { |
136 | $jobs[] = new $class( |
137 | $title, // maintain parent job title |
138 | [ |
139 | 'recursive' => true, |
140 | 'table' => $params['table'], |
141 | 'range' => [ |
142 | 'start' => $ranges[1][0], |
143 | 'end' => $ranges[count( $ranges ) - 1][1], |
144 | 'batchSize' => $realBSize, |
145 | 'subranges' => array_slice( $ranges, 1 ) |
146 | ], |
147 | // Track how many times the base job divided for debugging |
148 | 'division' => isset( $params['division'] ) |
149 | ? ( $params['division'] + 1 ) |
150 | : 1 |
151 | ] + $extraParams |
152 | ); |
153 | } |
154 | |
155 | return $jobs; |
156 | } |
157 | } |
158 | |
159 | /** @deprecated class alias since 1.44 */ |
160 | class_alias( BacklinkJobUtils::class, 'BacklinkJobUtils' ); |