Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
0.00% |
0 / 148 |
|
0.00% |
0 / 14 |
CRAP | |
0.00% |
0 / 1 |
Proposals | |
0.00% |
0 / 148 |
|
0.00% |
0 / 14 |
4692 | |
0.00% |
0 / 1 |
__construct | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
20 | |||
getLinkList | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
setCollection | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getProposals | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
12 | |||
hasBans | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
updateLinkList | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
addCollectionArticles | |
0.00% |
0 / 22 |
|
0.00% |
0 / 1 |
90 | |||
deleteUnusedArticles | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
12 | |||
resolveRedirects | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
6 | |||
getWeightedLinks | |
0.00% |
0 / 61 |
|
0.00% |
0 / 1 |
506 | |||
getPropList | |
0.00% |
0 / 30 |
|
0.00% |
0 / 1 |
132 | |||
searchEntry | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
12 | |||
checkLink | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
42 | |||
getPropCount | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 |
1 | <?php |
2 | /** |
3 | * Collection Extension for MediaWiki |
4 | * |
5 | * Copyright (C) 2008-2009, PediaPress GmbH |
6 | * |
7 | * This program is free software; you can redistribute it and/or modify |
8 | * it under the terms of the GNU General Public License as published by |
9 | * the Free Software Foundation; either version 2 of the License, or |
10 | * (at your option) any later version. |
11 | * |
12 | * This program is distributed in the hope that it will be useful, |
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
15 | * GNU General Public License for more details. |
16 | * |
17 | * You should have received a copy of the GNU General Public License along |
18 | * with this program; if not, write to the Free Software Foundation, Inc., |
19 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
20 | * http://www.gnu.org/copyleft/gpl.html |
21 | */ |
22 | |
23 | namespace MediaWiki\Extension\Collection; |
24 | |
25 | use Article; |
26 | use MediaWiki\Title\Title; |
27 | use TextContent; |
28 | |
29 | /** |
30 | * it needs 3 Lists: |
31 | * - one with the bookmembers |
32 | * - one where it can save the banned articles |
33 | * - one where it can save the proposals |
34 | * |
35 | * the proposallist can be accessed with the public method getProposals() |
36 | * |
37 | * a list with the bookarticles as first and information about the outgoing |
38 | * links of that article as second dimension can be accessed with the method |
39 | * getLinkList() |
40 | * |
41 | * the Class can only sort the proposals, if it can access the function compareProps |
42 | */ |
43 | class Proposals { |
44 | |
45 | /** |
46 | * @var array |
47 | */ |
48 | private $mColl; |
49 | |
50 | /** |
51 | * @var array[] |
52 | */ |
53 | private $mPropList; |
54 | |
55 | /** |
56 | * @var array[] |
57 | */ |
58 | private $mLinkList; |
59 | |
60 | /** |
61 | * @var string[] |
62 | */ |
63 | private $mBanList; |
64 | |
65 | /** |
66 | * @param array $coll the collection |
67 | * @param string[] $ban the list of the banned articles |
68 | * @param array[] $props the list of the proposals |
69 | */ |
70 | public function __construct( $coll, $ban, $props ) { |
71 | $this->mPropList = []; |
72 | $this->mColl = is_array( $coll ) ? $coll : []; |
73 | $this->mBanList = is_array( $ban ) ? $ban : []; |
74 | $this->mLinkList = is_array( $props ) ? $props : []; |
75 | } |
76 | |
77 | /** |
78 | * @return array[] |
79 | */ |
80 | public function getLinkList() { |
81 | return $this->mLinkList; |
82 | } |
83 | |
84 | /** |
85 | * @param array $collection |
86 | */ |
87 | public function setCollection( array $collection ) { |
88 | $this->mColl = $collection; |
89 | } |
90 | |
91 | /** |
92 | * Calculate the new proposals and return it |
93 | * |
94 | * @param int $num number of proposals to be returned |
95 | * 0 or less means, that all proposals will be returned |
96 | * this parameter is optional, the method will return |
97 | * all proposals by defaulted |
98 | * @param bool $doUpdate when true, $linkList will |
99 | * updated before calculating the proposals |
100 | * default is true |
101 | * @return array[] a 2-dimensional array that contains the proposals |
102 | * the first dimesion is numeric, the second contains |
103 | * 3 entries: |
104 | * - 'name': the name of a proposed article |
105 | * - 'num' : how often this artikel was linked in the |
106 | * bookmembers |
107 | * - 'val' : a value between 1 and 1.5, the higher the |
108 | * more this article is proposed |
109 | */ |
110 | public function getProposals( $num = 0, $doUpdate = true ) { |
111 | if ( $doUpdate ) { |
112 | $this->updateLinkList(); |
113 | } |
114 | |
115 | $this->getPropList(); |
116 | |
117 | if ( $num > 0 ) { |
118 | return array_slice( $this->mPropList, 0, $num ); |
119 | } else { |
120 | return $this->mPropList; |
121 | } |
122 | } |
123 | |
124 | /** |
125 | * @return bool |
126 | */ |
127 | public function hasBans() { |
128 | return count( $this->mBanList ) > 0; |
129 | } |
130 | |
131 | private function updateLinkList() { |
132 | $this->addCollectionArticles(); |
133 | $this->deleteUnusedArticles(); |
134 | } |
135 | |
136 | /** |
137 | * Check if all articles form the book are in $mLinkList |
138 | */ |
139 | private function addCollectionArticles() { |
140 | global $wgCollectionSuggestThreshhold; |
141 | |
142 | if ( !isset( $this->mColl['items'] ) ) { |
143 | return; |
144 | } |
145 | $numItems = count( $this->mColl['items'] ); |
146 | if ( $numItems === 0 || $numItems > $wgCollectionSuggestThreshhold ) { |
147 | return; |
148 | } |
149 | |
150 | foreach ( $this->mColl['items'] as $item ) { |
151 | if ( $this->searchEntry( $item['title'], $this->mLinkList ) === false |
152 | && $item['type'] == 'article' |
153 | ) { |
154 | $articleName = $item['title']; |
155 | $title = Title::makeTitleSafe( NS_MAIN, $articleName ); |
156 | $article = new Article( $title, $item['revision'] ); |
157 | |
158 | if ( $article === null ) { |
159 | continue; |
160 | } |
161 | |
162 | $content = $article->getPage()->getContent(); |
163 | $this->mLinkList[] = [ |
164 | 'name' => $articleName, |
165 | 'links' => $this->getWeightedLinks( |
166 | $numItems, |
167 | ( $content instanceof TextContent ) |
168 | ? $content->getText() : null |
169 | ), |
170 | ]; |
171 | } |
172 | } |
173 | } |
174 | |
175 | /** |
176 | * Delete items from $mLinkList that are not in the collection any more |
177 | */ |
178 | private function deleteUnusedArticles() { |
179 | $newList = []; |
180 | foreach ( $this->mLinkList as $item ) { |
181 | if ( Session::findArticle( $item['name'] ) != -1 ) { |
182 | $newList[] = $item; |
183 | } |
184 | } |
185 | $this->mLinkList = $newList; |
186 | } |
187 | |
188 | /** |
189 | * @param Title $title |
190 | * @return Title |
191 | */ |
192 | private function resolveRedirects( $title ) { |
193 | if ( !$title->isRedirect() ) { |
194 | return $title; |
195 | } |
196 | |
197 | $article = new Article( $title, 0 ); |
198 | return $article->getPage()->getContent()->getRedirectTarget(); |
199 | } |
200 | |
201 | /** |
202 | * Extract & count links from wikitext |
203 | * |
204 | * @param int $num_articles |
205 | * @param string $wikitext article text |
206 | * @return float[] with links and their weights |
207 | */ |
208 | private function getWeightedLinks( $num_articles, $wikitext ) { |
209 | global $wgCollectionSuggestCheapWeightThreshhold; |
210 | |
211 | $allLinks = []; |
212 | preg_match_all( |
213 | '/\[\[(.+?)\]\]/', |
214 | $wikitext, |
215 | $allLinks, |
216 | PREG_SET_ORDER |
217 | ); |
218 | |
219 | $linkmap = []; |
220 | foreach ( $allLinks as $link ) { |
221 | $link = $link[1]; |
222 | |
223 | if ( preg_match( '/[:#]/', $link ) ) { // skip links with ':' and '#' |
224 | continue; |
225 | } |
226 | |
227 | // handle links with a displaytitle |
228 | if ( preg_match( '/(.+?)\|(.+)/', $link, $matches ) ) { |
229 | [ , $link, $alias ] = $matches; |
230 | } else { |
231 | $alias = $link; |
232 | } |
233 | |
234 | // check & normalize title |
235 | $title = Title::makeTitleSafe( NS_MAIN, $link ); |
236 | if ( $title === null || !$title->exists() ) { |
237 | continue; |
238 | } |
239 | $resolved = $this->resolveRedirects( $title ); |
240 | if ( !$resolved ) { |
241 | continue; |
242 | } |
243 | $link = $resolved->getText(); |
244 | |
245 | if ( isset( $linkmap[$link] ) ) { |
246 | $linkmap[$link][$link] = true; |
247 | } else { |
248 | $linkmap[$link] = [ $link => true ]; |
249 | } |
250 | if ( $link != $alias ) { |
251 | if ( isset( $linkmap[$alias] ) ) { |
252 | $linkmap[$alias][$link] = true; |
253 | } else { |
254 | $linkmap[$alias] = [ $link => true ]; |
255 | } |
256 | } |
257 | } |
258 | |
259 | $linkcount = []; |
260 | if ( $num_articles < $wgCollectionSuggestCheapWeightThreshhold ) { |
261 | // more expensive algorithm: count words |
262 | foreach ( $linkmap as $alias => $linked ) { |
263 | $matches = []; |
264 | preg_match_all( |
265 | '/\W' . preg_quote( $alias, '/' ) . '\W/i', |
266 | $wikitext, |
267 | $matches |
268 | ); |
269 | $num = count( $matches[0] ); |
270 | |
271 | foreach ( $linked as $link => $dummy ) { |
272 | if ( isset( $linkcount[$link] ) ) { |
273 | $linkcount[$link] += $num; |
274 | } else { |
275 | $linkcount[$link] = $num; |
276 | } |
277 | } |
278 | } |
279 | |
280 | if ( count( $linkcount ) == 0 ) { |
281 | return []; |
282 | } |
283 | |
284 | // normalize: |
285 | $lc_max = 0; |
286 | foreach ( $linkcount as $count ) { |
287 | if ( $count > $lc_max ) { |
288 | $lc_max = $count; |
289 | } |
290 | } |
291 | $norm = log( $lc_max ); |
292 | $result = []; |
293 | if ( $norm > 0 ) { |
294 | foreach ( $linkcount as $link => $count ) { |
295 | $result[$link] = 1 + 0.5 * log( $count ) / $norm; |
296 | } |
297 | } else { |
298 | foreach ( $linkcount as $link => $count ) { |
299 | $result[$link] = 1; |
300 | } |
301 | } |
302 | |
303 | return $result; |
304 | } else { |
305 | // cheaper algorithm: just count links |
306 | foreach ( $linkmap as $linked ) { |
307 | foreach ( $linked as $link => $dummy ) { |
308 | $linkcount[$link] = 1; |
309 | } |
310 | } |
311 | |
312 | return $linkcount; |
313 | } |
314 | } |
315 | |
316 | /** |
317 | * Calculate the $mPropList from $mLinkList and $mBanList |
318 | */ |
319 | private function getPropList() { |
320 | $prop = []; |
321 | foreach ( $this->mLinkList as $article ) { |
322 | foreach ( $article['links'] as $linkName => $val ) { |
323 | if ( !$this->checkLink( $linkName ) ) { |
324 | continue; |
325 | } |
326 | $key = $this->searchEntry( $linkName, $prop ); |
327 | if ( $key !== false ) { |
328 | $prop[$key]['val'] += $val; |
329 | } else { |
330 | $prop[] = [ |
331 | 'name' => $linkName, |
332 | 'val' => $val, |
333 | ]; |
334 | } |
335 | } |
336 | } |
337 | usort( |
338 | $prop, |
339 | static function ( $a, $b ) { |
340 | if ( $a['val'] == $b['val'] ) { |
341 | return strcmp( $a['name'], $b['name'] ); |
342 | } |
343 | if ( $a['val'] < $b['val'] ) { |
344 | return 1; |
345 | } else { |
346 | return -1; |
347 | } |
348 | } |
349 | ); |
350 | $this->mPropList = []; |
351 | $have_real_weights = false; |
352 | foreach ( $prop as $p ) { |
353 | if ( $p['val'] > 1 ) { |
354 | $have_real_weights = true; |
355 | } |
356 | if ( $p['val'] <= 1 && $have_real_weights ) { |
357 | break; |
358 | } |
359 | $this->mPropList[] = $p; |
360 | } |
361 | } |
362 | |
363 | /** |
364 | * Search an article in an array and returns its key or false |
365 | * if the array doesn't contain the article |
366 | * |
367 | * @param string $entry an articlename |
368 | * @param array[] $array to be searched, it has to 2-dimensional |
369 | * the 2nd dimension needs the key 'name' |
370 | * @return bool|int the key as integer or false |
371 | */ |
372 | private function searchEntry( $entry, $array ) { |
373 | for ( $i = 0, $count = count( $array ); $i < $count; $i++ ) { |
374 | if ( $array[$i]['name'] == $entry ) { |
375 | return $i; |
376 | } |
377 | } |
378 | return false; |
379 | } |
380 | |
381 | /** |
382 | * Check if an article is banned or belongs to the book/collection |
383 | * |
384 | * @param string $link an articlename |
385 | * @return bool true: if the article can be added to the proposals |
386 | * false: if the article can't be added to the proposals |
387 | */ |
388 | private function checkLink( $link ) { |
389 | foreach ( $this->mColl['items'] as $item ) { |
390 | if ( $item['type'] == 'article' && $item['title'] == $link ) { |
391 | return false; |
392 | } |
393 | } |
394 | |
395 | if ( $this->hasBans() && in_array( $link, $this->mBanList ) ) { |
396 | return false; |
397 | } |
398 | |
399 | return true; |
400 | } |
401 | |
402 | /** |
403 | * @return int |
404 | */ |
405 | private function getPropCount() { |
406 | return count( $this->mPropList ); |
407 | } |
408 | } |