Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
86.14% covered (warning)
86.14%
87 / 101
40.00% covered (danger)
40.00%
2 / 5
CRAP
0.00% covered (danger)
0.00%
0 / 1
QueryBuildDocument
86.14% covered (warning)
86.14%
87 / 101
40.00% covered (danger)
40.00%
2 / 5
24.41
0.00% covered (danger)
0.00%
0 / 1
 __construct
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 execute
87.14% covered (warning)
87.14%
61 / 70
0.00% covered (danger)
0.00%
0 / 1
15.48
 getRevisionIDs
90.91% covered (success)
90.91%
10 / 11
0.00% covered (danger)
0.00%
0 / 1
5.02
 getAllowedParams
100.00% covered (success)
100.00%
15 / 15
100.00% covered (success)
100.00%
1 / 1
1
 getExamplesMessages
0.00% covered (danger)
0.00%
0 / 4
0.00% covered (danger)
0.00%
0 / 1
2
1<?php
2
3namespace CirrusSearch\Api;
4
5use ApiBase;
6use CirrusSearch\BuildDocument\BuildDocument;
7use CirrusSearch\BuildDocument\DocumentSizeLimiter;
8use CirrusSearch\CirrusSearch;
9use CirrusSearch\Profile\SearchProfileService;
10use CirrusSearch\Search\CirrusIndexField;
11use CirrusSearch\SearchConfig;
12use MediaWiki\MediaWikiServices;
13use Wikimedia\ParamValidator\ParamValidator;
14
15/**
16 * Generate CirrusSearch document for page.
17 *
18 * This program is free software; you can redistribute it and/or modify
19 * it under the terms of the GNU General Public License as published by
20 * the Free Software Foundation; either version 2 of the License, or
21 * (at your option) any later version.
22 *
23 * This program is distributed in the hope that it will be useful,
24 * but WITHOUT ANY WARRANTY; without even the implied warranty of
25 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
26 * GNU General Public License for more details.
27 *
28 * You should have received a copy of the GNU General Public License along
29 * with this program; if not, write to the Free Software Foundation, Inc.,
30 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
31 * http://www.gnu.org/copyleft/gpl.html
32 */
33class QueryBuildDocument extends \ApiQueryBase {
34    use ApiTrait;
35
36    public function __construct( \ApiQuery $query, $moduleName ) {
37        parent::__construct( $query, $moduleName, 'cb' );
38    }
39
40    public function execute() {
41        $result = $this->getResult();
42        $services = MediaWikiServices::getInstance();
43        $engine = $services->getSearchEngineFactory()->create();
44        if ( !( $engine instanceof CirrusSearch ) ) {
45            throw new \RuntimeException( 'Could not create cirrus engine' );
46        }
47
48        $builders = $this->getParameter( 'builders' );
49        $profile = $this->getParameter( 'limiterprofile' );
50        $flags = 0;
51        if ( !in_array( 'content', $builders ) ) {
52            $flags |= BuildDocument::SKIP_PARSE;
53        }
54        if ( !in_array( 'links', $builders ) ) {
55            $flags |= BuildDocument::SKIP_LINKS;
56        }
57
58        $pages = [];
59        $wikiPageFactory = $services->getWikiPageFactory();
60        $revisionStore = $services->getRevisionStore();
61        $revisionBased = false;
62        if ( $this->getPageSet()->getRevisionIDs() ) {
63            $revisionBased = true;
64            foreach ( $this->getRevisionIDs() as $pageId => $revId ) {
65                $rev = $revisionStore->getRevisionById( $revId );
66                if ( $rev === null ) {
67                    // We cannot trust ApiPageSet to properly identify missing revisions, RevisionStore
68                    // might not agree with it likely because they could be using different db replicas (T370770)
69                    $result->addValue( 'query', 'badrevids', [
70                        $revId => [ 'revid' => $revId, 'missing' => true ]
71                    ] );
72                } elseif ( $rev->audienceCan( $rev::DELETED_TEXT, $rev::FOR_PUBLIC ) ) {
73                    $pages[$pageId] = $rev;
74                } else {
75                    // While the user might have permissions, we want to limit
76                    // what could possibly be indexed to that which is public.
77                    // For an anon this would fail deeper in the system
78                    // anyways, this early check mostly avoids blowing up deep
79                    // in the bowels.
80                    $result->addValue(
81                        [ 'query', 'pages', $pageId ],
82                        'texthidden', true
83                    );
84                }
85            }
86        } else {
87            foreach ( $this->getPageSet()->getGoodPages() as $pageId => $title ) {
88                $pages[$pageId] = $wikiPageFactory->newFromTitle( $title );
89            }
90        }
91
92        $searchConfig = $engine->getConfig();
93        $builder = new BuildDocument(
94            $this->getCirrusConnection(),
95            $this->getDB(),
96            $services->getRevisionStore(),
97            $services->getBacklinkCacheFactory(),
98            new DocumentSizeLimiter( $searchConfig->getProfileService()
99                ->loadProfile( SearchProfileService::DOCUMENT_SIZE_LIMITER, SearchProfileService::CONTEXT_DEFAULT, $profile ) ),
100            $services->getTitleFormatter(),
101            $services->getWikiPageFactory()
102        );
103        $baseMetadata = [];
104        $clusterGroup = $searchConfig->getClusterAssignment()->getCrossClusterName();
105        if ( $clusterGroup !== null ) {
106            $baseMetadata['cluster_group'] = $clusterGroup;
107        }
108        $docs = $builder->initialize( $pages, $flags );
109        foreach ( $docs as $pageId => $doc ) {
110            $pageId = $doc->get( 'page_id' );
111            $revision = $revisionBased ? $pages[$pageId] : null;
112            if ( $builder->finalize( $doc, false, $revision ) ) {
113                $result->addValue(
114                    [ 'query', 'pages', $pageId ],
115                    'cirrusbuilddoc', $doc->getData()
116                );
117                $hints = CirrusIndexField::getHint( $doc, CirrusIndexField::NOOP_HINT );
118                $metadata = [];
119                if ( $hints !== null ) {
120                    $metadata = $baseMetadata + [ 'noop_hints' => $hints ];
121                }
122                $limiterStats = CirrusIndexField::getHint( $doc, DocumentSizeLimiter::HINT_DOC_SIZE_LIMITER_STATS );
123                if ( $limiterStats !== null ) {
124                    $metadata += [ 'size_limiter_stats' => $limiterStats ];
125                }
126                $indexName = $this->getCirrusConnection()->getIndexName( $searchConfig->get( SearchConfig::INDEX_BASE_NAME ),
127                    $this->getCirrusConnection()->getIndexSuffixForNamespace( $doc->get( 'namespace' ) ) );
128                $metadata += [
129                    'index_name' => $indexName
130                ];
131
132                $result->addValue( [ 'query', 'pages', $pageId ],
133                    'cirrusbuilddoc_metadata', $metadata );
134            }
135        }
136    }
137
138    private function getRevisionIDs(): array {
139        $result = [];
140        $warning = false;
141        foreach ( $this->getPageSet()->getRevisionIDs() as $revId => $pageId ) {
142            if ( isset( $result[$pageId] ) ) {
143                $warning = true;
144                if ( $result[$pageId] >= $revId ) {
145                    continue;
146                }
147            }
148            $result[$pageId] = $revId;
149        }
150        if ( $warning ) {
151            $this->addWarning( [ 'apiwarn-cirrus-ignore-revisions' ] );
152        }
153        return $result;
154    }
155
156    public function getAllowedParams() {
157        return [
158            'builders' => [
159                ParamValidator::PARAM_DEFAULT => [ 'content', 'links' ],
160                ParamValidator::PARAM_ISMULTI => true,
161                ParamValidator::PARAM_ALLOW_DUPLICATES => false,
162                ParamValidator::PARAM_TYPE => [
163                    'content',
164                    'links',
165                ],
166                ApiBase::PARAM_HELP_MSG => 'apihelp-query+cirrusbuilddoc-param-builders',
167            ],
168            'limiterprofile' => [
169                ParamValidator::PARAM_TYPE => 'string'
170            ],
171        ];
172    }
173
174    /**
175     * @see ApiBase::getExamplesMessages
176     * @return array
177     */
178    protected function getExamplesMessages() {
179        return [
180            'action=query&prop=cirrusbuilddoc&titles=Main_Page' =>
181                'apihelp-query+cirrusbuilddoc-example'
182        ];
183    }
184
185}