Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
85.04% |
108 / 127 |
|
75.00% |
6 / 8 |
CRAP | |
0.00% |
0 / 1 |
SearchTaskSuggester | |
85.04% |
108 / 127 |
|
75.00% |
6 / 8 |
36.65 | |
0.00% |
0 / 1 |
__construct | |
100.00% |
9 / 9 |
|
100.00% |
1 / 1 |
3 | |||
suggest | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
filter | |
100.00% |
17 / 17 |
|
100.00% |
1 / 1 |
2 | |||
doSuggest | |
80.00% |
56 / 70 |
|
0.00% |
0 / 1 |
13.15 | |||
search | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
mapTopicData | |
100.00% |
9 / 9 |
|
100.00% |
1 / 1 |
4 | |||
setDebugData | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
12 | |||
deduplicateSuggestions | |
100.00% |
6 / 6 |
|
100.00% |
1 / 1 |
4 | |||
compareTasks | |
100.00% |
9 / 9 |
|
100.00% |
1 / 1 |
4 |
1 | <?php |
2 | |
3 | namespace GrowthExperiments\NewcomerTasks\TaskSuggester; |
4 | |
5 | use GrowthExperiments\NewcomerTasks\NewcomerTasksUserOptionsLookup; |
6 | use GrowthExperiments\NewcomerTasks\Task\Task; |
7 | use GrowthExperiments\NewcomerTasks\Task\TaskSet; |
8 | use GrowthExperiments\NewcomerTasks\Task\TaskSetFilters; |
9 | use GrowthExperiments\NewcomerTasks\TaskSuggester\SearchStrategy\SearchQuery; |
10 | use GrowthExperiments\NewcomerTasks\TaskSuggester\SearchStrategy\SearchStrategy; |
11 | use GrowthExperiments\NewcomerTasks\TaskType\TaskType; |
12 | use GrowthExperiments\NewcomerTasks\TaskType\TaskTypeHandlerRegistry; |
13 | use GrowthExperiments\NewcomerTasks\Topic\Topic; |
14 | use GrowthExperiments\Util; |
15 | use ISearchResultSet; |
16 | use MediaWiki\Cache\LinkBatchFactory; |
17 | use MediaWiki\Message\Message; |
18 | use MediaWiki\Status\Status; |
19 | use MediaWiki\User\UserIdentity; |
20 | use MultipleIterator; |
21 | use Psr\Log\LoggerAwareInterface; |
22 | use Psr\Log\LoggerAwareTrait; |
23 | use Psr\Log\NullLogger; |
24 | use SearchResult; |
25 | use StatusValue; |
26 | |
27 | /** |
28 | * Shared functionality for local and remote search. |
29 | */ |
30 | abstract class SearchTaskSuggester implements TaskSuggester, LoggerAwareInterface { |
31 | |
32 | use LoggerAwareTrait; |
33 | |
34 | // Keep this in sync with GrowthTasksApi.js#fetchTasks |
35 | public const DEFAULT_LIMIT = 15; |
36 | |
37 | /** @var TaskTypeHandlerRegistry */ |
38 | private $taskTypeHandlerRegistry; |
39 | |
40 | /** @var SearchStrategy */ |
41 | protected $searchStrategy; |
42 | |
43 | /** @var NewcomerTasksUserOptionsLookup */ |
44 | private $newcomerTasksUserOptionsLookup; |
45 | |
46 | /** @var LinkBatchFactory */ |
47 | private $linkBatchFactory; |
48 | |
49 | /** @var TaskType[] id => TaskType */ |
50 | protected $taskTypes = []; |
51 | |
52 | /** @var Topic[] id => Topic */ |
53 | protected $topics = []; |
54 | |
55 | /** |
56 | * @param TaskTypeHandlerRegistry $taskTypeHandlerRegistry |
57 | * @param SearchStrategy $searchStrategy |
58 | * @param NewcomerTasksUserOptionsLookup $newcomerTasksUserOptionsLookup |
59 | * @param LinkBatchFactory $linkBatchFactory |
60 | * @param TaskType[] $taskTypes |
61 | * @param Topic[] $topics |
62 | */ |
63 | public function __construct( |
64 | TaskTypeHandlerRegistry $taskTypeHandlerRegistry, |
65 | SearchStrategy $searchStrategy, |
66 | NewcomerTasksUserOptionsLookup $newcomerTasksUserOptionsLookup, |
67 | LinkBatchFactory $linkBatchFactory, |
68 | array $taskTypes, |
69 | array $topics |
70 | ) { |
71 | $this->taskTypeHandlerRegistry = $taskTypeHandlerRegistry; |
72 | $this->searchStrategy = $searchStrategy; |
73 | $this->newcomerTasksUserOptionsLookup = $newcomerTasksUserOptionsLookup; |
74 | $this->linkBatchFactory = $linkBatchFactory; |
75 | foreach ( $taskTypes as $taskType ) { |
76 | $this->taskTypes[$taskType->getId()] = $taskType; |
77 | } |
78 | foreach ( $topics as $topic ) { |
79 | $this->topics[$topic->getId()] = $topic; |
80 | } |
81 | $this->logger = new NullLogger(); |
82 | } |
83 | |
84 | /** @inheritDoc */ |
85 | public function suggest( |
86 | UserIdentity $user, |
87 | TaskSetFilters $taskSetFilters, |
88 | ?int $limit = null, |
89 | ?int $offset = null, |
90 | array $options = [] |
91 | ) { |
92 | return $this->doSuggest( null, $user, $taskSetFilters, $limit, $offset, |
93 | $options ); |
94 | } |
95 | |
96 | /** @inheritDoc */ |
97 | public function filter( UserIdentity $user, TaskSet $taskSet ) { |
98 | $taskTypes = $taskSet->getFilters()->getTaskTypeFilters(); |
99 | |
100 | $pageTitles = array_map( static function ( Task $task ) { |
101 | return $task->getTitle(); |
102 | }, iterator_to_array( $taskSet ) ); |
103 | $linkBatch = $this->linkBatchFactory->newLinkBatch( $pageTitles ); |
104 | $pageIds = array_values( $linkBatch->execute() ); |
105 | |
106 | // Topic filtering is slow and topic changes don't really invalidate tasks, so just copy |
107 | // topic data from the old taskset instead. |
108 | $taskSetFilters = new TaskSetFilters( $taskTypes, [] ); |
109 | $filteredTaskSet = $this->doSuggest( $pageIds, $user, $taskSetFilters, $taskSet->count() ); |
110 | if ( !$filteredTaskSet instanceof TaskSet ) { |
111 | return $filteredTaskSet; |
112 | } |
113 | $filteredTasks = iterator_to_array( $filteredTaskSet ); |
114 | $this->mapTopicData( $taskSet, $filteredTasks ); |
115 | |
116 | $subtracted = $taskSet->count() - $filteredTaskSet->count(); |
117 | $finalTaskSet = new TaskSet( $filteredTasks, $taskSet->getTotalCount() - $subtracted, |
118 | $taskSet->getOffset(), $taskSet->getFilters(), $taskSet->getInvalidTasks() ); |
119 | $finalTaskSet->setDebugData( $taskSet->getDebugData() ); |
120 | return $finalTaskSet; |
121 | } |
122 | |
123 | /** |
124 | * See suggest() for details. The only difference is that $pageIds can be used to restrict |
125 | * to a specific set of pages. |
126 | * @param array|null $pageIds List of page IDs to limit suggestions to. |
127 | * @param UserIdentity $user |
128 | * @param TaskSetFilters $taskSetFilters |
129 | * @param int|null $limit |
130 | * @param int|null $offset |
131 | * @param array $options Same as in suggest(). |
132 | * @return TaskSet|StatusValue |
133 | */ |
134 | private function doSuggest( |
135 | ?array $pageIds, |
136 | UserIdentity $user, |
137 | TaskSetFilters $taskSetFilters, |
138 | ?int $limit = null, |
139 | ?int $offset = null, |
140 | array $options = [] |
141 | ) { |
142 | $debug = $options['debug'] ?? false; |
143 | |
144 | // We generally don't try to handle task type filtering for the A/B test (T278123) here |
145 | // as it is already handled in NewcomerTasksUserOptionsLookup, but we make an exception |
146 | // for the case when $taskTypeFilter === [] which would be difficult to handle elsewhere. |
147 | if ( !$taskSetFilters->getTaskTypeFilters() ) { |
148 | $taskSetFilters->setTaskTypeFilters( |
149 | $this->newcomerTasksUserOptionsLookup |
150 | ->filterTaskTypes( array_keys( $this->taskTypes ), $user ) |
151 | ); |
152 | } |
153 | |
154 | // FIXME these and task types should have similar validation rules |
155 | $topics = array_values( array_intersect_key( |
156 | $this->topics, |
157 | array_flip( $taskSetFilters->getTopicFilters() ) |
158 | ) ); |
159 | |
160 | $limit ??= self::DEFAULT_LIMIT; |
161 | // FIXME we are completely ignoring offset for now because 1) doing offsets when we are |
162 | // interleaving search results from multiple sources is hard, and 2) we are randomizing |
163 | // search results so offsets would not really be meaningful anyway. |
164 | $offset = 0; |
165 | $totalCount = 0; |
166 | $matchIterator = new MultipleIterator( MultipleIterator::MIT_NEED_ANY | |
167 | MultipleIterator::MIT_KEYS_ASSOC ); |
168 | |
169 | $taskTypes = $invalidTaskTypes = []; |
170 | $taskTypeFilter = $taskSetFilters->getTaskTypeFilters(); |
171 | foreach ( $taskTypeFilter as $taskTypeId ) { |
172 | $taskType = $this->taskTypes[$taskTypeId] ?? null; |
173 | if ( $taskType instanceof TaskType ) { |
174 | $taskTypes[] = $taskType; |
175 | } else { |
176 | $invalidTaskTypes[] = $taskTypeId; |
177 | } |
178 | } |
179 | |
180 | if ( !$taskTypes ) { |
181 | return StatusValue::newFatal( |
182 | wfMessage( 'growthexperiments-newcomertasks-invalid-tasktype', |
183 | Message::listParam( $invalidTaskTypes, 'comma' ) |
184 | ) |
185 | ); |
186 | } |
187 | |
188 | $queries = $this->searchStrategy->getQueries( |
189 | $taskTypes, |
190 | $topics, |
191 | $pageIds, |
192 | $options['excludePageIds'] ?? null, |
193 | $taskSetFilters->getTopicFiltersMode() |
194 | ); |
195 | foreach ( $queries as $query ) { |
196 | $matches = $this->search( $query, $limit, $offset, $debug ); |
197 | if ( $matches instanceof StatusValue ) { |
198 | // Only log when there's a logger; Status::getWikiText would break unit tests. |
199 | if ( !$this->logger instanceof NullLogger ) { |
200 | $this->logger->warning( 'Search error: {message}', [ |
201 | 'message' => Status::wrap( $matches )->getWikiText( false, false, 'en' ), |
202 | 'searchTerm' => $query->getQueryString(), |
203 | 'queryId' => $query->getId(), |
204 | 'limit' => $limit, |
205 | 'offset' => $offset, |
206 | ] ); |
207 | } |
208 | return $matches; |
209 | } |
210 | $totalCount += $matches->getTotalHits(); |
211 | $matchIterator->attachIterator( Util::getIteratorFromTraversable( $matches ), $query->getId() ); |
212 | } |
213 | |
214 | $taskCount = 0; |
215 | $suggestions = []; |
216 | foreach ( $matchIterator as $matchSlice ) { |
217 | /** @var SearchResult $match */ |
218 | foreach ( array_filter( $matchSlice ) as $queryId => $match ) { |
219 | // TODO: Filter out pages that are protected. |
220 | $query = $queries[$queryId]; |
221 | $taskType = $query->getTaskType(); |
222 | $suggestions[] = $this->taskTypeHandlerRegistry->getByTaskType( $taskType ) |
223 | ->createTaskFromSearchResult( $query, $match ); |
224 | $taskCount++; |
225 | if ( $taskCount >= $limit ) { |
226 | break 2; |
227 | } |
228 | } |
229 | } |
230 | |
231 | $suggestions = $this->deduplicateSuggestions( $suggestions ); |
232 | |
233 | $taskSet = new TaskSet( |
234 | $suggestions, |
235 | $totalCount, |
236 | $offset, |
237 | $taskSetFilters |
238 | ); |
239 | |
240 | if ( $debug ) { |
241 | $this->setDebugData( $taskSet, $queries ); |
242 | } |
243 | return $taskSet; |
244 | } |
245 | |
246 | /** |
247 | * @param SearchQuery $query |
248 | * @param int $limit |
249 | * @param int $offset |
250 | * @param bool $debug Store debug data so it can be set in setDebugData() |
251 | * @return ISearchResultSet|StatusValue Search results, or StatusValue on error. |
252 | */ |
253 | abstract protected function search( |
254 | SearchQuery $query, |
255 | int $limit, |
256 | int $offset, |
257 | bool $debug |
258 | ); |
259 | |
260 | /** |
261 | * Copy topic data from the tasks in $sourceTaskSet to the tasks in $targetTasks. |
262 | * @param TaskSet $sourceTaskSet |
263 | * @param Task[] $targetTasks |
264 | */ |
265 | private function mapTopicData( TaskSet $sourceTaskSet, array $targetTasks ) { |
266 | $taskMap = []; |
267 | foreach ( $sourceTaskSet as $task ) { |
268 | $key = $task->getTitle()->getNamespace() . ':' . $task->getTitle()->getDBkey(); |
269 | $taskMap[$key] = $task; |
270 | } |
271 | foreach ( $targetTasks as $task ) { |
272 | $key = $task->getTitle()->getNamespace() . ':' . $task->getTitle()->getDBkey(); |
273 | $sourceTask = $taskMap[$key] ?? null; |
274 | if ( $sourceTask ) { |
275 | $task->setTopics( $sourceTask->getTopics() ); |
276 | } |
277 | } |
278 | } |
279 | |
280 | /** |
281 | * Set extra debug data. Only called in debug mode. |
282 | * @param TaskSet $taskSet |
283 | * @param SearchQuery[] $queries |
284 | * @return void |
285 | */ |
286 | private function setDebugData( TaskSet $taskSet, array $queries ): void { |
287 | $debugUrls = []; |
288 | foreach ( $queries as $query ) { |
289 | if ( $query->getDebugUrl() ) { |
290 | $debugUrls[] = $query->getDebugUrl(); |
291 | } |
292 | } |
293 | $taskSet->setDebugData( [ 'searchDebugUrls' => $debugUrls ] ); |
294 | } |
295 | |
296 | /** |
297 | * Make sure there's only one task per article, even if an article is multiple task types / topics. |
298 | * @param Task[] $suggestions |
299 | * @return Task[] |
300 | */ |
301 | private function deduplicateSuggestions( array $suggestions ) { |
302 | /** @var Task[] $deduped */ |
303 | $deduped = []; |
304 | foreach ( $suggestions as $suggestion ) { |
305 | $key = $suggestion->getTitle()->getNamespace() . ':' . $suggestion->getTitle()->getDBkey(); |
306 | if ( !isset( $deduped[$key] ) || $this->compareTasks( $suggestion, $deduped[$key] ) < 0 ) { |
307 | $deduped[$key] = $suggestion; |
308 | } |
309 | } |
310 | return array_values( $deduped ); |
311 | } |
312 | |
313 | /** |
314 | * Compare two tasks for sorting. Return an integer, like strcmp & co. |
315 | * Task types that come first in the configured task type list take precedence. Otherwise, |
316 | * it's topics that come first. |
317 | * @param Task $first |
318 | * @param Task $second |
319 | * @return int |
320 | */ |
321 | private function compareTasks( Task $first, Task $second ): int { |
322 | $taskTypePosFirst = array_search( $first->getTaskType()->getId(), |
323 | array_keys( $this->taskTypes ), true ); |
324 | $taskTypePosSecond = array_search( $second->getTaskType()->getId(), |
325 | array_keys( $this->taskTypes ), true ); |
326 | // There should be at most one topic (otherwise we won't need the compare logic). |
327 | // No topic precedes any topic (although that comparison should never happen). |
328 | $topicPosFirst = $first->getTopics() ? array_search( $first->getTopics()[0]->getId(), |
329 | array_keys( $this->topics ), true ) : -9999; |
330 | $topicPosSecond = $second->getTopics() ? array_search( $second->getTopics()[0]->getId(), |
331 | array_keys( $this->topics ), true ) : -9999; |
332 | return ( $taskTypePosFirst - $taskTypePosSecond ) ?: ( $topicPosFirst - $topicPosSecond ); |
333 | } |
334 | |
335 | } |