Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
86.27% |
88 / 102 |
|
40.00% |
2 / 5 |
CRAP | |
0.00% |
0 / 1 |
QueryBuildDocument | |
86.27% |
88 / 102 |
|
40.00% |
2 / 5 |
24.37 | |
0.00% |
0 / 1 |
__construct | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
execute | |
87.32% |
62 / 71 |
|
0.00% |
0 / 1 |
15.46 | |||
getRevisionIDs | |
90.91% |
10 / 11 |
|
0.00% |
0 / 1 |
5.02 | |||
getAllowedParams | |
100.00% |
15 / 15 |
|
100.00% |
1 / 1 |
1 | |||
getExamplesMessages | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
2 |
1 | <?php |
2 | |
3 | namespace CirrusSearch\Api; |
4 | |
5 | use CirrusSearch\BuildDocument\BuildDocument; |
6 | use CirrusSearch\BuildDocument\DocumentSizeLimiter; |
7 | use CirrusSearch\CirrusSearch; |
8 | use CirrusSearch\Profile\SearchProfileService; |
9 | use CirrusSearch\Search\CirrusIndexField; |
10 | use CirrusSearch\SearchConfig; |
11 | use MediaWiki\Api\ApiBase; |
12 | use MediaWiki\Api\ApiQuery; |
13 | use MediaWiki\Api\ApiQueryBase; |
14 | use MediaWiki\MediaWikiServices; |
15 | use Wikimedia\ParamValidator\ParamValidator; |
16 | |
17 | /** |
18 | * Generate CirrusSearch document for page. |
19 | * |
20 | * This program is free software; you can redistribute it and/or modify |
21 | * it under the terms of the GNU General Public License as published by |
22 | * the Free Software Foundation; either version 2 of the License, or |
23 | * (at your option) any later version. |
24 | * |
25 | * This program is distributed in the hope that it will be useful, |
26 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
27 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
28 | * GNU General Public License for more details. |
29 | * |
30 | * You should have received a copy of the GNU General Public License along |
31 | * with this program; if not, write to the Free Software Foundation, Inc., |
32 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
33 | * http://www.gnu.org/copyleft/gpl.html |
34 | */ |
35 | class QueryBuildDocument extends ApiQueryBase { |
36 | use ApiTrait; |
37 | |
38 | public function __construct( ApiQuery $query, string $moduleName ) { |
39 | parent::__construct( $query, $moduleName, 'cb' ); |
40 | } |
41 | |
42 | public function execute() { |
43 | $result = $this->getResult(); |
44 | $services = MediaWikiServices::getInstance(); |
45 | $engine = $services->getSearchEngineFactory()->create(); |
46 | if ( !( $engine instanceof CirrusSearch ) ) { |
47 | throw new \RuntimeException( 'Could not create cirrus engine' ); |
48 | } |
49 | |
50 | $builders = $this->getParameter( 'builders' ); |
51 | $profile = $this->getParameter( 'limiterprofile' ); |
52 | $flags = 0; |
53 | if ( !in_array( 'content', $builders ) ) { |
54 | $flags |= BuildDocument::SKIP_PARSE; |
55 | } |
56 | if ( !in_array( 'links', $builders ) ) { |
57 | $flags |= BuildDocument::SKIP_LINKS; |
58 | } |
59 | |
60 | $pages = []; |
61 | $wikiPageFactory = $services->getWikiPageFactory(); |
62 | $revisionStore = $services->getRevisionStore(); |
63 | $revisionBased = false; |
64 | if ( $this->getPageSet()->getRevisionIDs() ) { |
65 | $revisionBased = true; |
66 | foreach ( $this->getRevisionIDs() as $pageId => $revId ) { |
67 | $rev = $revisionStore->getRevisionById( $revId ); |
68 | if ( $rev === null ) { |
69 | // We cannot trust ApiPageSet to properly identify missing revisions, RevisionStore |
70 | // might not agree with it likely because they could be using different db replicas (T370770) |
71 | $result->addValue( 'query', 'badrevids', [ |
72 | $revId => [ 'revid' => $revId, 'missing' => true ] |
73 | ] ); |
74 | } elseif ( $rev->audienceCan( $rev::DELETED_TEXT, $rev::FOR_PUBLIC ) ) { |
75 | $pages[$pageId] = $rev; |
76 | } else { |
77 | // While the user might have permissions, we want to limit |
78 | // what could possibly be indexed to that which is public. |
79 | // For an anon this would fail deeper in the system |
80 | // anyways, this early check mostly avoids blowing up deep |
81 | // in the bowels. |
82 | $result->addValue( |
83 | [ 'query', 'pages', $pageId ], |
84 | 'texthidden', true |
85 | ); |
86 | } |
87 | } |
88 | } else { |
89 | foreach ( $this->getPageSet()->getGoodPages() as $pageId => $title ) { |
90 | $pages[$pageId] = $wikiPageFactory->newFromTitle( $title ); |
91 | } |
92 | } |
93 | |
94 | $searchConfig = $engine->getConfig(); |
95 | $builder = new BuildDocument( |
96 | $this->getCirrusConnection(), |
97 | $this->getDB(), |
98 | $services->getRevisionStore(), |
99 | $services->getBacklinkCacheFactory(), |
100 | new DocumentSizeLimiter( $searchConfig->getProfileService() |
101 | ->loadProfile( SearchProfileService::DOCUMENT_SIZE_LIMITER, SearchProfileService::CONTEXT_DEFAULT, $profile ) ), |
102 | $services->getTitleFormatter(), |
103 | $services->getWikiPageFactory(), |
104 | $services->getTitleFactory() |
105 | ); |
106 | $baseMetadata = []; |
107 | $clusterGroup = $searchConfig->getClusterAssignment()->getCrossClusterName(); |
108 | if ( $clusterGroup !== null ) { |
109 | $baseMetadata['cluster_group'] = $clusterGroup; |
110 | } |
111 | $docs = $builder->initialize( $pages, $flags ); |
112 | foreach ( $docs as $pageId => $doc ) { |
113 | $pageId = $doc->get( 'page_id' ); |
114 | $revision = $revisionBased ? $pages[$pageId] : null; |
115 | if ( $builder->finalize( $doc, false, $revision ) ) { |
116 | $result->addValue( |
117 | [ 'query', 'pages', $pageId ], |
118 | 'cirrusbuilddoc', $doc->getData() |
119 | ); |
120 | $hints = CirrusIndexField::getHint( $doc, CirrusIndexField::NOOP_HINT ); |
121 | $metadata = []; |
122 | if ( $hints !== null ) { |
123 | $metadata = $baseMetadata + [ 'noop_hints' => $hints ]; |
124 | } |
125 | $limiterStats = CirrusIndexField::getHint( $doc, DocumentSizeLimiter::HINT_DOC_SIZE_LIMITER_STATS ); |
126 | if ( $limiterStats !== null ) { |
127 | $metadata += [ 'size_limiter_stats' => $limiterStats ]; |
128 | } |
129 | $indexName = $this->getCirrusConnection()->getIndexName( $searchConfig->get( SearchConfig::INDEX_BASE_NAME ), |
130 | $this->getCirrusConnection()->getIndexSuffixForNamespace( $doc->get( 'namespace' ) ) ); |
131 | $metadata += [ |
132 | 'index_name' => $indexName |
133 | ]; |
134 | |
135 | $result->addValue( [ 'query', 'pages', $pageId ], |
136 | 'cirrusbuilddoc_metadata', $metadata ); |
137 | } |
138 | } |
139 | } |
140 | |
141 | private function getRevisionIDs(): array { |
142 | $result = []; |
143 | $warning = false; |
144 | foreach ( $this->getPageSet()->getRevisionIDs() as $revId => $pageId ) { |
145 | if ( isset( $result[$pageId] ) ) { |
146 | $warning = true; |
147 | if ( $result[$pageId] >= $revId ) { |
148 | continue; |
149 | } |
150 | } |
151 | $result[$pageId] = $revId; |
152 | } |
153 | if ( $warning ) { |
154 | $this->addWarning( [ 'apiwarn-cirrus-ignore-revisions' ] ); |
155 | } |
156 | return $result; |
157 | } |
158 | |
159 | public function getAllowedParams() { |
160 | return [ |
161 | 'builders' => [ |
162 | ParamValidator::PARAM_DEFAULT => [ 'content', 'links' ], |
163 | ParamValidator::PARAM_ISMULTI => true, |
164 | ParamValidator::PARAM_ALLOW_DUPLICATES => false, |
165 | ParamValidator::PARAM_TYPE => [ |
166 | 'content', |
167 | 'links', |
168 | ], |
169 | ApiBase::PARAM_HELP_MSG => 'apihelp-query+cirrusbuilddoc-param-builders', |
170 | ], |
171 | 'limiterprofile' => [ |
172 | ParamValidator::PARAM_TYPE => 'string' |
173 | ], |
174 | ]; |
175 | } |
176 | |
177 | /** |
178 | * @see ApiBase::getExamplesMessages |
179 | * @return array |
180 | */ |
181 | protected function getExamplesMessages() { |
182 | return [ |
183 | 'action=query&prop=cirrusbuilddoc&titles=Main_Page' => |
184 | 'apihelp-query+cirrusbuilddoc-example' |
185 | ]; |
186 | } |
187 | |
188 | } |