Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
82.20% |
97 / 118 |
|
28.57% |
2 / 7 |
CRAP | |
0.00% |
0 / 1 |
QueryBuildDocument | |
82.20% |
97 / 118 |
|
28.57% |
2 / 7 |
31.11 | |
0.00% |
0 / 1 |
__construct | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
execute | |
86.59% |
71 / 82 |
|
0.00% |
0 / 1 |
17.70 | |||
getRevisionIDs | |
90.91% |
10 / 11 |
|
0.00% |
0 / 1 |
5.02 | |||
getAllowedParams | |
100.00% |
15 / 15 |
|
100.00% |
1 / 1 |
1 | |||
isInternal | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getExamplesMessages | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
2 | |||
markUnrenderable | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
2 |
1 | <?php |
2 | |
3 | namespace CirrusSearch\Api; |
4 | |
5 | use CirrusSearch\BuildDocument\BuildDocument; |
6 | use CirrusSearch\BuildDocument\DocumentSizeLimiter; |
7 | use CirrusSearch\CirrusSearch; |
8 | use CirrusSearch\Profile\SearchProfileService; |
9 | use CirrusSearch\Search\CirrusIndexField; |
10 | use CirrusSearch\SearchConfig; |
11 | use MediaWiki\Api\ApiBase; |
12 | use MediaWiki\Api\ApiQuery; |
13 | use MediaWiki\Api\ApiQueryBase; |
14 | use MediaWiki\Api\ApiResult; |
15 | use MediaWiki\MediaWikiServices; |
16 | use MediaWiki\Revision\SlotRecord; |
17 | use Wikimedia\ParamValidator\ParamValidator; |
18 | |
19 | /** |
20 | * Generate CirrusSearch document for page. |
21 | * |
22 | * This program is free software; you can redistribute it and/or modify |
23 | * it under the terms of the GNU General Public License as published by |
24 | * the Free Software Foundation; either version 2 of the License, or |
25 | * (at your option) any later version. |
26 | * |
27 | * This program is distributed in the hope that it will be useful, |
28 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
29 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
30 | * GNU General Public License for more details. |
31 | * |
32 | * You should have received a copy of the GNU General Public License along |
33 | * with this program; if not, write to the Free Software Foundation, Inc., |
34 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
35 | * http://www.gnu.org/copyleft/gpl.html |
36 | */ |
37 | class QueryBuildDocument extends ApiQueryBase { |
38 | use ApiTrait; |
39 | |
40 | public function __construct( ApiQuery $query, string $moduleName ) { |
41 | parent::__construct( $query, $moduleName, 'cb' ); |
42 | } |
43 | |
44 | public function execute() { |
45 | $result = $this->getResult(); |
46 | $services = MediaWikiServices::getInstance(); |
47 | $engine = $services->getSearchEngineFactory()->create(); |
48 | if ( !( $engine instanceof CirrusSearch ) ) { |
49 | throw new \RuntimeException( 'Could not create cirrus engine' ); |
50 | } |
51 | |
52 | $builders = $this->getParameter( 'builders' ); |
53 | $profile = $this->getParameter( 'limiterprofile' ); |
54 | $flags = 0; |
55 | if ( !in_array( 'content', $builders ) ) { |
56 | $flags |= BuildDocument::SKIP_PARSE; |
57 | } |
58 | if ( !in_array( 'links', $builders ) ) { |
59 | $flags |= BuildDocument::SKIP_LINKS; |
60 | } |
61 | |
62 | $pages = []; |
63 | $wikiPageFactory = $services->getWikiPageFactory(); |
64 | $revisionStore = $services->getRevisionStore(); |
65 | $revisionBased = false; |
66 | if ( $this->getPageSet()->getRevisionIDs() ) { |
67 | $revisionBased = true; |
68 | foreach ( $this->getRevisionIDs() as $pageId => $revId ) { |
69 | $rev = $revisionStore->getRevisionById( $revId ); |
70 | if ( $rev === null ) { |
71 | // We cannot trust ApiPageSet to properly identify missing revisions, RevisionStore |
72 | // might not agree with it likely because they could be using different db replicas (T370770) |
73 | $result->addValue( 'query', 'badrevids', [ |
74 | $revId => [ 'revid' => $revId, 'missing' => true ] |
75 | ] ); |
76 | } elseif ( $rev->audienceCan( $rev::DELETED_TEXT, $rev::FOR_PUBLIC ) ) { |
77 | // Redirects are not directly represented as searchable documents. |
78 | // They are unrenderable. |
79 | if ( $rev->getContent( SlotRecord::MAIN )->isRedirect() ) { |
80 | $this->markUnrenderable( $result, $pageId ); |
81 | } else { |
82 | $pages[$pageId] = $rev; |
83 | } |
84 | } else { |
85 | // While the user might have permissions, we want to limit |
86 | // what could possibly be indexed to that which is public. |
87 | // For an anon this would fail deeper in the system |
88 | // anyways, this early check mostly avoids blowing up deep |
89 | // in the bowels. |
90 | $result->addValue( |
91 | [ 'query', 'pages', $pageId ], |
92 | 'texthidden', true |
93 | ); |
94 | } |
95 | } |
96 | } else { |
97 | foreach ( $this->getPageSet()->getGoodPages() as $pageId => $title ) { |
98 | $page = $wikiPageFactory->newFromTitle( $title ); |
99 | if ( $page->isRedirect() ) { |
100 | $this->markUnrenderable( $result, $pageId ); |
101 | } else { |
102 | $pages[$pageId] = $page; |
103 | } |
104 | } |
105 | } |
106 | |
107 | $searchConfig = $engine->getConfig(); |
108 | $builder = new BuildDocument( |
109 | $this->getCirrusConnection(), |
110 | $this->getDB(), |
111 | $services->getRevisionStore(), |
112 | $services->getBacklinkCacheFactory(), |
113 | new DocumentSizeLimiter( $searchConfig->getProfileService() |
114 | ->loadProfile( SearchProfileService::DOCUMENT_SIZE_LIMITER, SearchProfileService::CONTEXT_DEFAULT, $profile ) ), |
115 | $services->getTitleFormatter(), |
116 | $services->getWikiPageFactory(), |
117 | $services->getTitleFactory() |
118 | ); |
119 | $baseMetadata = []; |
120 | $clusterGroup = $searchConfig->getClusterAssignment()->getCrossClusterName(); |
121 | if ( $clusterGroup !== null ) { |
122 | $baseMetadata['cluster_group'] = $clusterGroup; |
123 | } |
124 | $docs = $builder->initialize( $pages, $flags ); |
125 | foreach ( $docs as $pageId => $doc ) { |
126 | $pageId = $doc->get( 'page_id' ); |
127 | $revision = $revisionBased ? $pages[$pageId] : null; |
128 | if ( $builder->finalize( $doc, false, $revision ) ) { |
129 | $result->addValue( |
130 | [ 'query', 'pages', $pageId ], |
131 | 'cirrusbuilddoc', $doc->getData() |
132 | ); |
133 | $hints = CirrusIndexField::getHint( $doc, CirrusIndexField::NOOP_HINT ); |
134 | $metadata = []; |
135 | if ( $hints !== null ) { |
136 | $metadata = $baseMetadata + [ 'noop_hints' => $hints ]; |
137 | } |
138 | $limiterStats = CirrusIndexField::getHint( $doc, DocumentSizeLimiter::HINT_DOC_SIZE_LIMITER_STATS ); |
139 | if ( $limiterStats !== null ) { |
140 | $metadata += [ 'size_limiter_stats' => $limiterStats ]; |
141 | } |
142 | $indexName = $this->getCirrusConnection()->getIndexName( $searchConfig->get( SearchConfig::INDEX_BASE_NAME ), |
143 | $this->getCirrusConnection()->getIndexSuffixForNamespace( $doc->get( 'namespace' ) ) ); |
144 | $metadata += [ |
145 | 'index_name' => $indexName |
146 | ]; |
147 | |
148 | $result->addValue( [ 'query', 'pages', $pageId ], |
149 | 'cirrusbuilddoc_metadata', $metadata ); |
150 | $result->addValue( |
151 | [ 'query', 'pages', $pageId ], |
152 | 'cirrusbuilddoc_comment', |
153 | 'The CirrusDoc format is meant for internal use by CirrusSearch for debugging or queries, ' |
154 | . 'it might change at any time without notice' |
155 | ); |
156 | } |
157 | } |
158 | } |
159 | |
160 | private function getRevisionIDs(): array { |
161 | $result = []; |
162 | $warning = false; |
163 | foreach ( $this->getPageSet()->getRevisionIDs() as $revId => $pageId ) { |
164 | if ( isset( $result[$pageId] ) ) { |
165 | $warning = true; |
166 | if ( $result[$pageId] >= $revId ) { |
167 | continue; |
168 | } |
169 | } |
170 | $result[$pageId] = $revId; |
171 | } |
172 | if ( $warning ) { |
173 | $this->addWarning( [ 'apiwarn-cirrus-ignore-revisions' ] ); |
174 | } |
175 | return $result; |
176 | } |
177 | |
178 | /** @inheritDoc */ |
179 | public function getAllowedParams() { |
180 | return [ |
181 | 'builders' => [ |
182 | ParamValidator::PARAM_DEFAULT => [ 'content', 'links' ], |
183 | ParamValidator::PARAM_ISMULTI => true, |
184 | ParamValidator::PARAM_ALLOW_DUPLICATES => false, |
185 | ParamValidator::PARAM_TYPE => [ |
186 | 'content', |
187 | 'links', |
188 | ], |
189 | ApiBase::PARAM_HELP_MSG => 'apihelp-query+cirrusbuilddoc-param-builders', |
190 | ], |
191 | 'limiterprofile' => [ |
192 | ParamValidator::PARAM_TYPE => 'string' |
193 | ], |
194 | ]; |
195 | } |
196 | |
197 | /** |
198 | * Mark as internal. This isn't meant to be used by normal api users |
199 | * @return bool |
200 | */ |
201 | public function isInternal() { |
202 | return true; |
203 | } |
204 | |
205 | /** |
206 | * @see ApiBase::getExamplesMessages |
207 | * @return array |
208 | */ |
209 | protected function getExamplesMessages() { |
210 | return [ |
211 | 'action=query&prop=cirrusbuilddoc&titles=Main_Page' => |
212 | 'apihelp-query+cirrusbuilddoc-example' |
213 | ]; |
214 | } |
215 | |
216 | /** |
217 | * @param ApiResult $result Result obect to write to |
218 | * @param int $pageId The page to mark unrenderable |
219 | */ |
220 | private function markUnrenderable( ApiResult $result, int $pageId ) { |
221 | $result->addValue( |
222 | [ 'query', 'pages', $pageId ], |
223 | 'unrenderable', true |
224 | ); |
225 | } |
226 | |
227 | } |