Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
86.27% covered (warning)
86.27%
88 / 102
40.00% covered (danger)
40.00%
2 / 5
CRAP
0.00% covered (danger)
0.00%
0 / 1
QueryBuildDocument
86.27% covered (warning)
86.27%
88 / 102
40.00% covered (danger)
40.00%
2 / 5
24.37
0.00% covered (danger)
0.00%
0 / 1
 __construct
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 execute
87.32% covered (warning)
87.32%
62 / 71
0.00% covered (danger)
0.00%
0 / 1
15.46
 getRevisionIDs
90.91% covered (success)
90.91%
10 / 11
0.00% covered (danger)
0.00%
0 / 1
5.02
 getAllowedParams
100.00% covered (success)
100.00%
15 / 15
100.00% covered (success)
100.00%
1 / 1
1
 getExamplesMessages
0.00% covered (danger)
0.00%
0 / 4
0.00% covered (danger)
0.00%
0 / 1
2
1<?php
2
3namespace CirrusSearch\Api;
4
5use CirrusSearch\BuildDocument\BuildDocument;
6use CirrusSearch\BuildDocument\DocumentSizeLimiter;
7use CirrusSearch\CirrusSearch;
8use CirrusSearch\Profile\SearchProfileService;
9use CirrusSearch\Search\CirrusIndexField;
10use CirrusSearch\SearchConfig;
11use MediaWiki\Api\ApiBase;
12use MediaWiki\Api\ApiQuery;
13use MediaWiki\Api\ApiQueryBase;
14use MediaWiki\MediaWikiServices;
15use Wikimedia\ParamValidator\ParamValidator;
16
17/**
18 * Generate CirrusSearch document for page.
19 *
20 * This program is free software; you can redistribute it and/or modify
21 * it under the terms of the GNU General Public License as published by
22 * the Free Software Foundation; either version 2 of the License, or
23 * (at your option) any later version.
24 *
25 * This program is distributed in the hope that it will be useful,
26 * but WITHOUT ANY WARRANTY; without even the implied warranty of
27 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
28 * GNU General Public License for more details.
29 *
30 * You should have received a copy of the GNU General Public License along
31 * with this program; if not, write to the Free Software Foundation, Inc.,
32 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
33 * http://www.gnu.org/copyleft/gpl.html
34 */
35class QueryBuildDocument extends ApiQueryBase {
36    use ApiTrait;
37
38    public function __construct( ApiQuery $query, string $moduleName ) {
39        parent::__construct( $query, $moduleName, 'cb' );
40    }
41
42    public function execute() {
43        $result = $this->getResult();
44        $services = MediaWikiServices::getInstance();
45        $engine = $services->getSearchEngineFactory()->create();
46        if ( !( $engine instanceof CirrusSearch ) ) {
47            throw new \RuntimeException( 'Could not create cirrus engine' );
48        }
49
50        $builders = $this->getParameter( 'builders' );
51        $profile = $this->getParameter( 'limiterprofile' );
52        $flags = 0;
53        if ( !in_array( 'content', $builders ) ) {
54            $flags |= BuildDocument::SKIP_PARSE;
55        }
56        if ( !in_array( 'links', $builders ) ) {
57            $flags |= BuildDocument::SKIP_LINKS;
58        }
59
60        $pages = [];
61        $wikiPageFactory = $services->getWikiPageFactory();
62        $revisionStore = $services->getRevisionStore();
63        $revisionBased = false;
64        if ( $this->getPageSet()->getRevisionIDs() ) {
65            $revisionBased = true;
66            foreach ( $this->getRevisionIDs() as $pageId => $revId ) {
67                $rev = $revisionStore->getRevisionById( $revId );
68                if ( $rev === null ) {
69                    // We cannot trust ApiPageSet to properly identify missing revisions, RevisionStore
70                    // might not agree with it likely because they could be using different db replicas (T370770)
71                    $result->addValue( 'query', 'badrevids', [
72                        $revId => [ 'revid' => $revId, 'missing' => true ]
73                    ] );
74                } elseif ( $rev->audienceCan( $rev::DELETED_TEXT, $rev::FOR_PUBLIC ) ) {
75                    $pages[$pageId] = $rev;
76                } else {
77                    // While the user might have permissions, we want to limit
78                    // what could possibly be indexed to that which is public.
79                    // For an anon this would fail deeper in the system
80                    // anyways, this early check mostly avoids blowing up deep
81                    // in the bowels.
82                    $result->addValue(
83                        [ 'query', 'pages', $pageId ],
84                        'texthidden', true
85                    );
86                }
87            }
88        } else {
89            foreach ( $this->getPageSet()->getGoodPages() as $pageId => $title ) {
90                $pages[$pageId] = $wikiPageFactory->newFromTitle( $title );
91            }
92        }
93
94        $searchConfig = $engine->getConfig();
95        $builder = new BuildDocument(
96            $this->getCirrusConnection(),
97            $this->getDB(),
98            $services->getRevisionStore(),
99            $services->getBacklinkCacheFactory(),
100            new DocumentSizeLimiter( $searchConfig->getProfileService()
101                ->loadProfile( SearchProfileService::DOCUMENT_SIZE_LIMITER, SearchProfileService::CONTEXT_DEFAULT, $profile ) ),
102            $services->getTitleFormatter(),
103            $services->getWikiPageFactory(),
104            $services->getTitleFactory()
105        );
106        $baseMetadata = [];
107        $clusterGroup = $searchConfig->getClusterAssignment()->getCrossClusterName();
108        if ( $clusterGroup !== null ) {
109            $baseMetadata['cluster_group'] = $clusterGroup;
110        }
111        $docs = $builder->initialize( $pages, $flags );
112        foreach ( $docs as $pageId => $doc ) {
113            $pageId = $doc->get( 'page_id' );
114            $revision = $revisionBased ? $pages[$pageId] : null;
115            if ( $builder->finalize( $doc, false, $revision ) ) {
116                $result->addValue(
117                    [ 'query', 'pages', $pageId ],
118                    'cirrusbuilddoc', $doc->getData()
119                );
120                $hints = CirrusIndexField::getHint( $doc, CirrusIndexField::NOOP_HINT );
121                $metadata = [];
122                if ( $hints !== null ) {
123                    $metadata = $baseMetadata + [ 'noop_hints' => $hints ];
124                }
125                $limiterStats = CirrusIndexField::getHint( $doc, DocumentSizeLimiter::HINT_DOC_SIZE_LIMITER_STATS );
126                if ( $limiterStats !== null ) {
127                    $metadata += [ 'size_limiter_stats' => $limiterStats ];
128                }
129                $indexName = $this->getCirrusConnection()->getIndexName( $searchConfig->get( SearchConfig::INDEX_BASE_NAME ),
130                    $this->getCirrusConnection()->getIndexSuffixForNamespace( $doc->get( 'namespace' ) ) );
131                $metadata += [
132                    'index_name' => $indexName
133                ];
134
135                $result->addValue( [ 'query', 'pages', $pageId ],
136                    'cirrusbuilddoc_metadata', $metadata );
137            }
138        }
139    }
140
141    private function getRevisionIDs(): array {
142        $result = [];
143        $warning = false;
144        foreach ( $this->getPageSet()->getRevisionIDs() as $revId => $pageId ) {
145            if ( isset( $result[$pageId] ) ) {
146                $warning = true;
147                if ( $result[$pageId] >= $revId ) {
148                    continue;
149                }
150            }
151            $result[$pageId] = $revId;
152        }
153        if ( $warning ) {
154            $this->addWarning( [ 'apiwarn-cirrus-ignore-revisions' ] );
155        }
156        return $result;
157    }
158
159    public function getAllowedParams() {
160        return [
161            'builders' => [
162                ParamValidator::PARAM_DEFAULT => [ 'content', 'links' ],
163                ParamValidator::PARAM_ISMULTI => true,
164                ParamValidator::PARAM_ALLOW_DUPLICATES => false,
165                ParamValidator::PARAM_TYPE => [
166                    'content',
167                    'links',
168                ],
169                ApiBase::PARAM_HELP_MSG => 'apihelp-query+cirrusbuilddoc-param-builders',
170            ],
171            'limiterprofile' => [
172                ParamValidator::PARAM_TYPE => 'string'
173            ],
174        ];
175    }
176
177    /**
178     * @see ApiBase::getExamplesMessages
179     * @return array
180     */
181    protected function getExamplesMessages() {
182        return [
183            'action=query&prop=cirrusbuilddoc&titles=Main_Page' =>
184                'apihelp-query+cirrusbuilddoc-example'
185        ];
186    }
187
188}