Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
82.44% covered (warning)
82.44%
108 / 131
25.00% covered (danger)
25.00%
2 / 8
CRAP
0.00% covered (danger)
0.00%
0 / 1
QueryBuildDocument
82.44% covered (warning)
82.44%
108 / 131
25.00% covered (danger)
25.00%
2 / 8
33.55
0.00% covered (danger)
0.00%
0 / 1
 __construct
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 execute
81.25% covered (warning)
81.25%
13 / 16
0.00% covered (danger)
0.00%
0 / 1
3.06
 doExecute
87.34% covered (warning)
87.34%
69 / 79
0.00% covered (danger)
0.00%
0 / 1
16.52
 getRevisionIDs
90.91% covered (success)
90.91%
10 / 11
0.00% covered (danger)
0.00%
0 / 1
5.02
 getAllowedParams
100.00% covered (success)
100.00%
15 / 15
100.00% covered (success)
100.00%
1 / 1
1
 isInternal
0.00% covered (danger)
0.00%
0 / 1
0.00% covered (danger)
0.00%
0 / 1
2
 getExamplesMessages
0.00% covered (danger)
0.00%
0 / 4
0.00% covered (danger)
0.00%
0 / 1
2
 markUnrenderable
0.00% covered (danger)
0.00%
0 / 4
0.00% covered (danger)
0.00%
0 / 1
2
1<?php
2
3namespace CirrusSearch\Api;
4
5use CirrusSearch\BuildDocument\BuildDocument;
6use CirrusSearch\BuildDocument\DocumentSizeLimiter;
7use CirrusSearch\CirrusSearch;
8use CirrusSearch\Profile\SearchProfileService;
9use CirrusSearch\Search\CirrusIndexField;
10use CirrusSearch\SearchConfig;
11use MediaWiki\Api\ApiBase;
12use MediaWiki\Api\ApiQuery;
13use MediaWiki\Api\ApiQueryBase;
14use MediaWiki\Api\ApiResult;
15use MediaWiki\MediaWikiServices;
16use MediaWiki\PoolCounter\PoolCounterWorkViaCallback;
17use MediaWiki\Revision\SlotRecord;
18use Wikimedia\ParamValidator\ParamValidator;
19
20/**
21 * Generate CirrusSearch document for page.
22 *
23 * @license GPL-2.0-or-later
24 */
25class QueryBuildDocument extends ApiQueryBase {
26    use ApiTrait;
27
28    public function __construct( ApiQuery $query, string $moduleName ) {
29        parent::__construct( $query, $moduleName, 'cb' );
30    }
31
32    public function execute() {
33        $engine = MediaWikiServices::getInstance()->getSearchEngineFactory()->create();
34        if ( !( $engine instanceof CirrusSearch ) ) {
35            throw new \RuntimeException( 'Could not create cirrus engine' );
36        }
37
38        if ( $this->getUser()->getName() === $engine->getConfig()->get( "CirrusSearchStreamingUpdaterUsername" ) ) {
39            // Bypass poolcounter protection for the internal cirrus user
40            $this->doExecute( $engine );
41        } else {
42            // Protect against too many concurrent requests
43            // Use a global key this API is internal and could only be useful for manual debugging purposes
44            // so no real need to have it on per user basis.
45            $worker = new PoolCounterWorkViaCallback( 'CirrusSearch-QueryBuildDocument', 'QueryBuildDocument',
46                [
47                    'doWork' => function () use ( $engine ) {
48                        return $this->doExecute( $engine );
49                    },
50                    'error' => function () {
51                        $this->dieWithError( 'apierror-concurrency-limit' );
52                    },
53                ]
54            );
55            $worker->execute();
56        }
57    }
58
59    public function doExecute( CirrusSearch $engine ) {
60        $result = $this->getResult();
61        $services = MediaWikiServices::getInstance();
62
63        $builders = $this->getParameter( 'builders' );
64        $profile = $this->getParameter( 'limiterprofile' );
65        $flags = 0;
66        if ( !in_array( 'content', $builders ) ) {
67            $flags |= BuildDocument::SKIP_PARSE;
68        }
69        if ( !in_array( 'links', $builders ) ) {
70            $flags |= BuildDocument::SKIP_LINKS;
71        }
72
73        $pages = [];
74        $wikiPageFactory = $services->getWikiPageFactory();
75        $revisionStore = $services->getRevisionStore();
76        $revisionBased = false;
77        if ( $this->getPageSet()->getRevisionIDs() ) {
78            $revisionBased = true;
79            foreach ( $this->getRevisionIDs() as $pageId => $revId ) {
80                $rev = $revisionStore->getRevisionById( $revId );
81                if ( $rev === null ) {
82                    // We cannot trust ApiPageSet to properly identify missing revisions, RevisionStore
83                    // might not agree with it likely because they could be using different db replicas (T370770)
84                    $result->addValue( 'query', 'badrevids', [
85                        $revId => [ 'revid' => $revId, 'missing' => true ]
86                    ] );
87                } elseif ( $rev->audienceCan( $rev::DELETED_TEXT, $rev::FOR_PUBLIC ) ) {
88                    // Redirects are not directly represented as searchable documents.
89                    // They are unrenderable.
90                    if ( $rev->getContent( SlotRecord::MAIN )->isRedirect() ) {
91                        $this->markUnrenderable( $result, $pageId );
92                    } else {
93                        $pages[$pageId] = $rev;
94                    }
95                } else {
96                    // While the user might have permissions, we want to limit
97                    // what could possibly be indexed to that which is public.
98                    // For an anon this would fail deeper in the system
99                    // anyways, this early check mostly avoids blowing up deep
100                    // in the bowels.
101                    $result->addValue(
102                        [ 'query', 'pages', $pageId ],
103                        'texthidden', true
104                    );
105                }
106            }
107        } else {
108            foreach ( $this->getPageSet()->getGoodPages() as $pageId => $title ) {
109                $page = $wikiPageFactory->newFromTitle( $title );
110                if ( $page->isRedirect() ) {
111                    $this->markUnrenderable( $result, $pageId );
112                } else {
113                    $pages[$pageId] = $page;
114                }
115            }
116        }
117
118        $searchConfig = $engine->getConfig();
119        $builder = new BuildDocument(
120            $this->getCirrusConnection(),
121            $this->getDB(),
122            $services->getRevisionStore(),
123            $services->getBacklinkCacheFactory(),
124            new DocumentSizeLimiter( $searchConfig->getProfileService()
125                ->loadProfile( SearchProfileService::DOCUMENT_SIZE_LIMITER, SearchProfileService::CONTEXT_DEFAULT, $profile ) ),
126            $services->getTitleFormatter(),
127            $services->getWikiPageFactory(),
128            $services->getTitleFactory()
129        );
130        $baseMetadata = [];
131        $clusterGroup = $searchConfig->getClusterAssignment()->getCrossClusterName();
132        if ( $clusterGroup !== null ) {
133            $baseMetadata['cluster_group'] = $clusterGroup;
134        }
135        $docs = $builder->initialize( $pages, $flags );
136        foreach ( $docs as $pageId => $doc ) {
137            $pageId = $doc->get( 'page_id' );
138            $revision = $revisionBased ? $pages[$pageId] : null;
139            if ( $builder->finalize( $doc, false, $revision ) ) {
140                $result->addValue(
141                    [ 'query', 'pages', $pageId ],
142                    'cirrusbuilddoc', $doc->getData()
143                );
144                $hints = CirrusIndexField::getHint( $doc, CirrusIndexField::NOOP_HINT );
145                $metadata = [];
146                if ( $hints !== null ) {
147                    $metadata = $baseMetadata + [ 'noop_hints' => $hints ];
148                }
149                $limiterStats = CirrusIndexField::getHint( $doc, DocumentSizeLimiter::HINT_DOC_SIZE_LIMITER_STATS );
150                if ( $limiterStats !== null ) {
151                    $metadata += [ 'size_limiter_stats' => $limiterStats ];
152                }
153                $indexName = $this->getCirrusConnection()->getIndexName( $searchConfig->get( SearchConfig::INDEX_BASE_NAME ),
154                    $this->getCirrusConnection()->getIndexSuffixForNamespace( $doc->get( 'namespace' ) ) );
155                $metadata += [
156                    'index_name' => $indexName
157                ];
158
159                $result->addValue( [ 'query', 'pages', $pageId ],
160                    'cirrusbuilddoc_metadata', $metadata );
161                $result->addValue(
162                    [ 'query', 'pages', $pageId ],
163                    'cirrusbuilddoc_comment',
164                    'The CirrusDoc format is meant for internal use by CirrusSearch for debugging or queries, '
165                    . 'it might change at any time without notice'
166                );
167            }
168        }
169    }
170
171    private function getRevisionIDs(): array {
172        $result = [];
173        $warning = false;
174        foreach ( $this->getPageSet()->getRevisionIDs() as $revId => $pageId ) {
175            if ( isset( $result[$pageId] ) ) {
176                $warning = true;
177                if ( $result[$pageId] >= $revId ) {
178                    continue;
179                }
180            }
181            $result[$pageId] = $revId;
182        }
183        if ( $warning ) {
184            $this->addWarning( [ 'apiwarn-cirrus-ignore-revisions' ] );
185        }
186        return $result;
187    }
188
189    /** @inheritDoc */
190    public function getAllowedParams() {
191        return [
192            'builders' => [
193                ParamValidator::PARAM_DEFAULT => [ 'content', 'links' ],
194                ParamValidator::PARAM_ISMULTI => true,
195                ParamValidator::PARAM_ALLOW_DUPLICATES => false,
196                ParamValidator::PARAM_TYPE => [
197                    'content',
198                    'links',
199                ],
200                ApiBase::PARAM_HELP_MSG => 'apihelp-query+cirrusbuilddoc-param-builders',
201            ],
202            'limiterprofile' => [
203                ParamValidator::PARAM_TYPE => 'string'
204            ],
205        ];
206    }
207
208    /**
209     * Mark as internal. This isn't meant to be used by normal api users
210     * @return bool
211     */
212    public function isInternal() {
213        return true;
214    }
215
216    /**
217     * @see ApiBase::getExamplesMessages
218     * @return array
219     */
220    protected function getExamplesMessages() {
221        return [
222            'action=query&prop=cirrusbuilddoc&titles=Main_Page' =>
223                'apihelp-query+cirrusbuilddoc-example'
224        ];
225    }
226
227    /**
228     * @param ApiResult $result Result obect to write to
229     * @param int $pageId The page to mark unrenderable
230     */
231    private function markUnrenderable( ApiResult $result, int $pageId ) {
232        $result->addValue(
233            [ 'query', 'pages', $pageId ],
234            'unrenderable', true
235        );
236    }
237
238}