Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
0.00% |
0 / 90 |
|
0.00% |
0 / 9 |
CRAP | |
0.00% |
0 / 1 |
PrewarmParsoidParserCache | |
0.00% |
0 / 90 |
|
0.00% |
0 / 9 |
420 | |
0.00% |
0 / 1 |
__construct | |
0.00% |
0 / 14 |
|
0.00% |
0 / 1 |
2 | |||
getPageLookup | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
getRevisionLookup | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
getParserOutputAccess | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
getParsoidSiteConfig | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
getQueryBuilder | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
2 | |||
parse | |
0.00% |
0 / 12 |
|
0.00% |
0 / 1 |
12 | |||
normalizeNamespace | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
execute | |
0.00% |
0 / 48 |
|
0.00% |
0 / 1 |
110 |
1 | <?php |
2 | use MediaWiki\Maintenance\Maintenance; |
3 | use MediaWiki\Page\PageLookup; |
4 | use MediaWiki\Page\PageRecord; |
5 | use MediaWiki\Page\ParserOutputAccess; |
6 | use MediaWiki\Parser\ParserOptions; |
7 | use MediaWiki\Parser\Parsoid\Config\SiteConfig as ParsoidSiteConfig; |
8 | use MediaWiki\Revision\RevisionLookup; |
9 | use MediaWiki\Revision\RevisionRecord; |
10 | use MediaWiki\Revision\SlotRecord; |
11 | use MediaWiki\Status\Status; |
12 | use Wikimedia\Parsoid\Core\ClientError; |
13 | use Wikimedia\Parsoid\Core\ResourceLimitExceededException; |
14 | use Wikimedia\Rdbms\SelectQueryBuilder; |
15 | |
16 | // @codeCoverageIgnoreStart |
17 | require_once __DIR__ . '/Maintenance.php'; |
18 | // @codeCoverageIgnoreEnd |
19 | |
20 | /** |
21 | * Maintenance script for populating parser cache with parsoid output. |
22 | * |
23 | * @since 1.41 |
24 | * |
25 | * @license GPL-2.0-or-later |
26 | * @author Richika Rana |
27 | */ |
28 | class PrewarmParsoidParserCache extends Maintenance { |
29 | private int $forceParse = 0; |
30 | private ParserOutputAccess $parserOutputAccess; |
31 | private PageLookup $pageLookup; |
32 | private RevisionLookup $revisionLookup; |
33 | private ParsoidSiteConfig $parsoidSiteConfig; |
34 | |
35 | public function __construct() { |
36 | parent::__construct(); |
37 | |
38 | $this->addDescription( |
39 | 'Populate parser cache with parsoid output. By default, script attempt to run' . |
40 | 'for supported content model pages (in a specified batch if provided)' |
41 | ); |
42 | $this->addOption( |
43 | 'force', |
44 | 'Re-parse pages even if the cached entry seems up to date', |
45 | false, |
46 | false |
47 | ); |
48 | $this->addOption( 'start-from', 'Start from this page ID', false, true ); |
49 | $this->addOption( 'namespace', 'Filter pages in this namespace', false, true ); |
50 | $this->setBatchSize( 100 ); |
51 | } |
52 | |
53 | private function getPageLookup(): PageLookup { |
54 | $this->pageLookup = $this->getServiceContainer()->getPageStore(); |
55 | return $this->pageLookup; |
56 | } |
57 | |
58 | private function getRevisionLookup(): RevisionLookup { |
59 | $this->revisionLookup = $this->getServiceContainer()->getRevisionLookup(); |
60 | return $this->revisionLookup; |
61 | } |
62 | |
63 | private function getParserOutputAccess(): ParserOutputAccess { |
64 | $this->parserOutputAccess = $this->getServiceContainer()->getParserOutputAccess(); |
65 | return $this->parserOutputAccess; |
66 | } |
67 | |
68 | private function getParsoidSiteConfig(): ParsoidSiteConfig { |
69 | $this->parsoidSiteConfig = $this->getServiceContainer()->getParsoidSiteConfig(); |
70 | return $this->parsoidSiteConfig; |
71 | } |
72 | |
73 | private function getQueryBuilder(): SelectQueryBuilder { |
74 | $dbr = $this->getReplicaDB(); |
75 | |
76 | return $dbr->newSelectQueryBuilder() |
77 | ->select( [ 'page_id' ] ) |
78 | ->from( 'page' ) |
79 | ->caller( __METHOD__ ) |
80 | ->orderBy( 'page_id', SelectQueryBuilder::SORT_ASC ); |
81 | } |
82 | |
83 | private function parse( |
84 | PageRecord $page, |
85 | RevisionRecord $revision |
86 | ): Status { |
87 | $popts = ParserOptions::newFromAnon(); |
88 | $popts->setUseParsoid(); |
89 | try { |
90 | return $this->getParserOutputAccess()->getParserOutput( |
91 | $page, |
92 | $popts, |
93 | $revision, |
94 | $this->forceParse |
95 | ); |
96 | } catch ( ClientError $e ) { |
97 | return Status::newFatal( 'parsoid-client-error', $e->getMessage() ); |
98 | } catch ( ResourceLimitExceededException $e ) { |
99 | return Status::newFatal( 'parsoid-resource-limit-exceeded', $e->getMessage() ); |
100 | } |
101 | } |
102 | |
103 | /* |
104 | * NamespaceInfo::getCanonicalIndex() requires the namespace to be in lowercase, |
105 | * so let's do some normalization and return its canonical index. |
106 | * |
107 | * @param string $namespace The namespace string from the command line |
108 | * @return int The canonical index of the namespace |
109 | */ |
110 | private function normalizeNamespace( string $namespace ): int { |
111 | return $this->getServiceContainer()->getNamespaceInfo() |
112 | ->getCanonicalIndex( strtolower( $namespace ) ); |
113 | } |
114 | |
115 | /** |
116 | * Populate parser cache with parsoid output. |
117 | * |
118 | * @return bool |
119 | */ |
120 | public function execute() { |
121 | $force = $this->getOption( 'force' ); |
122 | $startFrom = $this->getOption( 'start-from' ); |
123 | |
124 | // We need the namespace index instead of the name to perform the query |
125 | // on, because that's what the page table stores (in the page_namespace field). |
126 | $namespaceIndex = null; |
127 | $namespace = $this->getOption( 'namespace' ); |
128 | if ( $namespace !== null ) { |
129 | $namespaceIndex = $this->normalizeNamespace( $namespace ); |
130 | } |
131 | |
132 | if ( $force !== null ) { |
133 | // If --force is supplied, for a parse for supported pages or supported |
134 | // pages in the specified batch. |
135 | $this->forceParse = ParserOutputAccess::OPT_FORCE_PARSE; |
136 | } |
137 | |
138 | $startFrom = (int)$startFrom; |
139 | |
140 | $this->output( "\nWarming parsoid parser cache with Parsoid output...\n\n" ); |
141 | while ( true ) { |
142 | $query = $this->getQueryBuilder(); |
143 | if ( $namespaceIndex !== null ) { |
144 | $query = $query->where( [ 'page_namespace' => $namespaceIndex ] ); |
145 | } |
146 | $query = $query->where( $this->getReplicaDB()->expr( 'page_id', '>=', $startFrom ) ) |
147 | ->limit( $this->getBatchSize() ); |
148 | |
149 | $result = $query->fetchResultSet(); |
150 | |
151 | if ( !$result->numRows() ) { |
152 | break; |
153 | } |
154 | |
155 | $currentBatch = $startFrom + ( $this->getBatchSize() - 1 ); |
156 | $this->output( "\n\nBatch: $startFrom - $currentBatch\n----\n" ); |
157 | |
158 | // Look through pages by pageId and populate the parserCache |
159 | foreach ( $result as $row ) { |
160 | $page = $this->getPageLookup()->getPageById( $row->page_id ); |
161 | $startFrom = ( (int)$row->page_id + 1 ); |
162 | |
163 | if ( $page === null ) { |
164 | $this->output( "\n[Skipped] Page ID: $row->page_id not found.\n" ); |
165 | continue; |
166 | } |
167 | |
168 | $latestRevision = $page->getLatest(); |
169 | $revision = $this->getRevisionLookup()->getRevisionById( $latestRevision ); |
170 | $mainSlot = $revision->getSlot( SlotRecord::MAIN ); |
171 | |
172 | // POA will write a dummy output to PC, but we don't want that here. Just skip! |
173 | if ( !$this->getParsoidSiteConfig()->supportsContentModel( $mainSlot->getModel() ) ) { |
174 | $this->output( |
175 | '[Skipped] Content model "' . |
176 | $mainSlot->getModel() . |
177 | "\" not supported for page ID: $row->page_id.\n" |
178 | ); |
179 | continue; |
180 | } |
181 | |
182 | $status = $this->parse( $page, $revision ); |
183 | if ( !$status->isOK() ) { |
184 | $this->output( |
185 | __METHOD__ . |
186 | ": Error parsing page ID: $row->page_id or writing to parser cache\n" |
187 | ); |
188 | continue; |
189 | } |
190 | |
191 | $this->output( "[Done] Page ID: $row->page_id ✔️\n" ); |
192 | } |
193 | $this->waitForReplication(); |
194 | } |
195 | |
196 | $this->output( "\nDone pre-warming parsoid parser cache...\n" ); |
197 | |
198 | return true; |
199 | } |
200 | } |
201 | |
202 | // @codeCoverageIgnoreStart |
203 | $maintClass = PrewarmParsoidParserCache::class; |
204 | require_once RUN_MAINTENANCE_IF_MAIN; |
205 | // @codeCoverageIgnoreEnd |