Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
85.04% covered (warning)
85.04%
108 / 127
75.00% covered (warning)
75.00%
6 / 8
CRAP
0.00% covered (danger)
0.00%
0 / 1
SearchTaskSuggester
85.04% covered (warning)
85.04%
108 / 127
75.00% covered (warning)
75.00%
6 / 8
36.65
0.00% covered (danger)
0.00%
0 / 1
 __construct
100.00% covered (success)
100.00%
9 / 9
100.00% covered (success)
100.00%
1 / 1
3
 suggest
100.00% covered (success)
100.00%
2 / 2
100.00% covered (success)
100.00%
1 / 1
1
 filter
100.00% covered (success)
100.00%
17 / 17
100.00% covered (success)
100.00%
1 / 1
2
 doSuggest
80.00% covered (warning)
80.00%
56 / 70
0.00% covered (danger)
0.00%
0 / 1
13.15
 search
n/a
0 / 0
n/a
0 / 0
0
 mapTopicData
100.00% covered (success)
100.00%
9 / 9
100.00% covered (success)
100.00%
1 / 1
4
 setDebugData
0.00% covered (danger)
0.00%
0 / 5
0.00% covered (danger)
0.00%
0 / 1
12
 deduplicateSuggestions
100.00% covered (success)
100.00%
6 / 6
100.00% covered (success)
100.00%
1 / 1
4
 compareTasks
100.00% covered (success)
100.00%
9 / 9
100.00% covered (success)
100.00%
1 / 1
4
1<?php
2
3namespace GrowthExperiments\NewcomerTasks\TaskSuggester;
4
5use GrowthExperiments\NewcomerTasks\NewcomerTasksUserOptionsLookup;
6use GrowthExperiments\NewcomerTasks\Task\Task;
7use GrowthExperiments\NewcomerTasks\Task\TaskSet;
8use GrowthExperiments\NewcomerTasks\Task\TaskSetFilters;
9use GrowthExperiments\NewcomerTasks\TaskSuggester\SearchStrategy\SearchQuery;
10use GrowthExperiments\NewcomerTasks\TaskSuggester\SearchStrategy\SearchStrategy;
11use GrowthExperiments\NewcomerTasks\TaskType\TaskType;
12use GrowthExperiments\NewcomerTasks\TaskType\TaskTypeHandlerRegistry;
13use GrowthExperiments\NewcomerTasks\Topic\Topic;
14use GrowthExperiments\Util;
15use ISearchResultSet;
16use MediaWiki\Cache\LinkBatchFactory;
17use MediaWiki\Status\Status;
18use MediaWiki\User\UserIdentity;
19use Message;
20use MultipleIterator;
21use Psr\Log\LoggerAwareInterface;
22use Psr\Log\LoggerAwareTrait;
23use Psr\Log\NullLogger;
24use SearchResult;
25use StatusValue;
26
27/**
28 * Shared functionality for local and remote search.
29 */
30abstract class SearchTaskSuggester implements TaskSuggester, LoggerAwareInterface {
31
32    use LoggerAwareTrait;
33
34    // Keep this in sync with GrowthTasksApi.js#fetchTasks
35    public const DEFAULT_LIMIT = 15;
36
37    /** @var TaskTypeHandlerRegistry */
38    private $taskTypeHandlerRegistry;
39
40    /** @var SearchStrategy */
41    protected $searchStrategy;
42
43    /** @var NewcomerTasksUserOptionsLookup */
44    private $newcomerTasksUserOptionsLookup;
45
46    /** @var LinkBatchFactory */
47    private $linkBatchFactory;
48
49    /** @var TaskType[] id => TaskType */
50    protected $taskTypes = [];
51
52    /** @var Topic[] id => Topic */
53    protected $topics = [];
54
55    /**
56     * @param TaskTypeHandlerRegistry $taskTypeHandlerRegistry
57     * @param SearchStrategy $searchStrategy
58     * @param NewcomerTasksUserOptionsLookup $newcomerTasksUserOptionsLookup
59     * @param LinkBatchFactory $linkBatchFactory
60     * @param TaskType[] $taskTypes
61     * @param Topic[] $topics
62     */
63    public function __construct(
64        TaskTypeHandlerRegistry $taskTypeHandlerRegistry,
65        SearchStrategy $searchStrategy,
66        NewcomerTasksUserOptionsLookup $newcomerTasksUserOptionsLookup,
67        LinkBatchFactory $linkBatchFactory,
68        array $taskTypes,
69        array $topics
70    ) {
71        $this->taskTypeHandlerRegistry = $taskTypeHandlerRegistry;
72        $this->searchStrategy = $searchStrategy;
73        $this->newcomerTasksUserOptionsLookup = $newcomerTasksUserOptionsLookup;
74        $this->linkBatchFactory = $linkBatchFactory;
75        foreach ( $taskTypes as $taskType ) {
76            $this->taskTypes[$taskType->getId()] = $taskType;
77        }
78        foreach ( $topics as $topic ) {
79            $this->topics[$topic->getId()] = $topic;
80        }
81        $this->logger = new NullLogger();
82    }
83
84    /** @inheritDoc */
85    public function suggest(
86        UserIdentity $user,
87        TaskSetFilters $taskSetFilters,
88        ?int $limit = null,
89        ?int $offset = null,
90        array $options = []
91    ) {
92        return $this->doSuggest( null, $user, $taskSetFilters, $limit, $offset,
93            $options );
94    }
95
96    /** @inheritDoc */
97    public function filter( UserIdentity $user, TaskSet $taskSet ) {
98        $taskTypes = $taskSet->getFilters()->getTaskTypeFilters();
99
100        $pageTitles = array_map( static function ( Task $task ) {
101            return $task->getTitle();
102        }, iterator_to_array( $taskSet ) );
103        $linkBatch = $this->linkBatchFactory->newLinkBatch( $pageTitles );
104        $pageIds = array_values( $linkBatch->execute() );
105
106        // Topic filtering is slow and topic changes don't really invalidate tasks, so just copy
107        // topic data from the old taskset instead.
108        $taskSetFilters = new TaskSetFilters( $taskTypes, [] );
109        $filteredTaskSet = $this->doSuggest( $pageIds, $user, $taskSetFilters, $taskSet->count() );
110        if ( !$filteredTaskSet instanceof TaskSet ) {
111            return $filteredTaskSet;
112        }
113        $filteredTasks = iterator_to_array( $filteredTaskSet );
114        $this->mapTopicData( $taskSet, $filteredTasks );
115
116        $subtracted = $taskSet->count() - $filteredTaskSet->count();
117        $finalTaskSet = new TaskSet( $filteredTasks, $taskSet->getTotalCount() - $subtracted,
118            $taskSet->getOffset(), $taskSet->getFilters(), $taskSet->getInvalidTasks() );
119        $finalTaskSet->setDebugData( $taskSet->getDebugData() );
120        return $finalTaskSet;
121    }
122
123    /**
124     * See suggest() for details. The only difference is that $pageIds can be used to restrict
125     * to a specific set of pages.
126     * @param array|null $pageIds List of page IDs to limit suggestions to.
127     * @param UserIdentity $user
128     * @param TaskSetFilters $taskSetFilters
129     * @param int|null $limit
130     * @param int|null $offset
131     * @param array $options Same as in suggest().
132     * @return TaskSet|StatusValue
133     */
134    private function doSuggest(
135        ?array $pageIds,
136        UserIdentity $user,
137        TaskSetFilters $taskSetFilters,
138        ?int $limit = null,
139        ?int $offset = null,
140        array $options = []
141    ) {
142        $debug = $options['debug'] ?? false;
143
144        // We generally don't try to handle task type filtering for the A/B test (T278123) here
145        // as it is already handled in NewcomerTasksUserOptionsLookup, but we make an exception
146        // for the case when $taskTypeFilter === [] which would be difficult to handle elsewhere.
147        if ( !$taskSetFilters->getTaskTypeFilters() ) {
148            $taskSetFilters->setTaskTypeFilters(
149                $this->newcomerTasksUserOptionsLookup
150                    ->filterTaskTypes( array_keys( $this->taskTypes ), $user )
151            );
152        }
153
154        // FIXME these and task types should have similar validation rules
155        $topics = array_values( array_intersect_key(
156            $this->topics,
157            array_flip( $taskSetFilters->getTopicFilters() )
158        ) );
159
160        $limit ??= self::DEFAULT_LIMIT;
161        // FIXME we are completely ignoring offset for now because 1) doing offsets when we are
162        //   interleaving search results from multiple sources is hard, and 2) we are randomizing
163        //   search results so offsets would not really be meaningful anyway.
164        $offset = 0;
165        $totalCount = 0;
166        $matchIterator = new MultipleIterator( MultipleIterator::MIT_NEED_ANY |
167            MultipleIterator::MIT_KEYS_ASSOC );
168
169        $taskTypes = $invalidTaskTypes = [];
170        $taskTypeFilter = $taskSetFilters->getTaskTypeFilters();
171        foreach ( $taskTypeFilter as $taskTypeId ) {
172            $taskType = $this->taskTypes[$taskTypeId] ?? null;
173            if ( $taskType instanceof TaskType ) {
174                $taskTypes[] = $taskType;
175            } else {
176                $invalidTaskTypes[] = $taskTypeId;
177            }
178        }
179
180        if ( !$taskTypes ) {
181            return StatusValue::newFatal(
182                wfMessage( 'growthexperiments-newcomertasks-invalid-tasktype',
183                    Message::listParam( $invalidTaskTypes, 'comma' )
184                )
185            );
186        }
187
188        $queries = $this->searchStrategy->getQueries(
189            $taskTypes,
190            $topics,
191            $pageIds,
192            $options['excludePageIds'] ?? null,
193            $taskSetFilters->getTopicFiltersMode()
194        );
195        foreach ( $queries as $query ) {
196            $matches = $this->search( $query, $limit, $offset, $debug );
197            if ( $matches instanceof StatusValue ) {
198                // Only log when there's a logger; Status::getWikiText would break unit tests.
199                if ( !$this->logger instanceof NullLogger ) {
200                    $this->logger->warning( 'Search error: {message}', [
201                        'message' => Status::wrap( $matches )->getWikiText( false, false, 'en' ),
202                        'searchTerm' => $query->getQueryString(),
203                        'queryId' => $query->getId(),
204                        'limit' => $limit,
205                        'offset' => $offset,
206                    ] );
207                }
208                return $matches;
209            }
210            $totalCount += $matches->getTotalHits();
211            $matchIterator->attachIterator( Util::getIteratorFromTraversable( $matches ), $query->getId() );
212        }
213
214        $taskCount = 0;
215        $suggestions = [];
216        foreach ( $matchIterator as $matchSlice ) {
217            foreach ( array_filter( $matchSlice ) as $queryId => $match ) {
218                // TODO: Filter out pages that are protected.
219                /** @var $match SearchResult */
220                $query = $queries[$queryId];
221                $taskType = $query->getTaskType();
222                $suggestions[] = $this->taskTypeHandlerRegistry->getByTaskType( $taskType )
223                    ->createTaskFromSearchResult( $query, $match );
224                $taskCount++;
225                if ( $taskCount >= $limit ) {
226                    break 2;
227                }
228            }
229        }
230
231        $suggestions = $this->deduplicateSuggestions( $suggestions );
232
233        $taskSet = new TaskSet(
234            $suggestions,
235            $totalCount,
236            $offset,
237            $taskSetFilters
238        );
239
240        if ( $debug ) {
241            $this->setDebugData( $taskSet, $queries );
242        }
243        return $taskSet;
244    }
245
246    /**
247     * @param SearchQuery $query
248     * @param int $limit
249     * @param int $offset
250     * @param bool $debug Store debug data so it can be set in setDebugData()
251     * @return ISearchResultSet|StatusValue Search results, or StatusValue on error.
252     */
253    abstract protected function search(
254        SearchQuery $query,
255        int $limit,
256        int $offset,
257        bool $debug
258    );
259
260    /**
261     * Copy topic data from the tasks in $sourceTaskSet to the tasks in $targetTasks.
262     * @param TaskSet $sourceTaskSet
263     * @param Task[] $targetTasks
264     */
265    private function mapTopicData( TaskSet $sourceTaskSet, array $targetTasks ) {
266        $taskMap = [];
267        foreach ( $sourceTaskSet as $task ) {
268            $key = $task->getTitle()->getNamespace() . ':' . $task->getTitle()->getDBkey();
269            $taskMap[$key] = $task;
270        }
271
272        foreach ( $targetTasks as $task ) {
273            $key = $task->getTitle()->getNamespace() . ':' . $task->getTitle()->getDBkey();
274            $sourceTask = $taskMap[$key] ?? null;
275            if ( $sourceTask ) {
276                $task->setTopics( $sourceTask->getTopics(), $sourceTask->getTopicScores() );
277            }
278        }
279    }
280
281    /**
282     * Set extra debug data. Only called in debug mode.
283     * @param TaskSet $taskSet
284     * @param SearchQuery[] $queries
285     * @return void
286     */
287    private function setDebugData( TaskSet $taskSet, array $queries ): void {
288        $debugUrls = [];
289        foreach ( $queries as $query ) {
290            if ( $query->getDebugUrl() ) {
291                $debugUrls[] = $query->getDebugUrl();
292            }
293        }
294        $taskSet->setDebugData( [ 'searchDebugUrls' => $debugUrls ] );
295    }
296
297    /**
298     * Make sure there's only one task per article, even if an article is multiple task types / topics.
299     * @param Task[] $suggestions
300     * @return Task[]
301     */
302    private function deduplicateSuggestions( array $suggestions ) {
303        /** @var Task[] $deduped */
304        $deduped = [];
305        foreach ( $suggestions as $suggestion ) {
306            $key = $suggestion->getTitle()->getNamespace() . ':' . $suggestion->getTitle()->getDBkey();
307            if ( !isset( $deduped[$key] ) || $this->compareTasks( $suggestion, $deduped[$key] ) < 0 ) {
308                $deduped[$key] = $suggestion;
309            }
310        }
311        return array_values( $deduped );
312    }
313
314    /**
315     * Compare two tasks for sorting. Return an integer, like strcmp & co.
316     * Task types that come first in the configured task type list take precedence. Otherwise,
317     * it's topics that come first.
318     * @param Task $first
319     * @param Task $second
320     * @return int
321     */
322    private function compareTasks( Task $first, Task $second ): int {
323        $taskTypePosFirst = array_search( $first->getTaskType()->getId(),
324            array_keys( $this->taskTypes ), true );
325        $taskTypePosSecond = array_search( $second->getTaskType()->getId(),
326            array_keys( $this->taskTypes ), true );
327        // There should be at most one topic (otherwise we won't need the compare logic).
328        // No topic precedes any topic (although that comparison should never happen).
329        $topicPosFirst = $first->getTopics() ? array_search( $first->getTopics()[0]->getId(),
330            array_keys( $this->topics ), true ) : -9999;
331        $topicPosSecond = $second->getTopics() ? array_search( $second->getTopics()[0]->getId(),
332            array_keys( $this->topics ), true ) : -9999;
333        return ( $taskTypePosFirst - $taskTypePosSecond ) ?: ( $topicPosFirst - $topicPosSecond );
334    }
335
336}