Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
0.00% |
0 / 85 |
|
0.00% |
0 / 8 |
CRAP | |
0.00% |
0 / 1 |
PrewarmParsoidParserCache | |
0.00% |
0 / 82 |
|
0.00% |
0 / 8 |
306 | |
0.00% |
0 / 1 |
__construct | |
0.00% |
0 / 14 |
|
0.00% |
0 / 1 |
2 | |||
getPageLookup | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
getRevisionLookup | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
getParsoidOutputAccess | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
getQueryBuilder | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
2 | |||
parse | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
2 | |||
normalizeNamespace | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
execute | |
0.00% |
0 / 48 |
|
0.00% |
0 / 1 |
110 |
1 | <?php |
2 | use MediaWiki\Page\PageIdentity; |
3 | use MediaWiki\Page\PageLookup; |
4 | use MediaWiki\Page\ParserOutputAccess; |
5 | use MediaWiki\Parser\Parsoid\ParsoidOutputAccess; |
6 | use MediaWiki\Revision\RevisionLookup; |
7 | use MediaWiki\Revision\RevisionRecord; |
8 | use MediaWiki\Revision\SlotRecord; |
9 | use Wikimedia\Rdbms\SelectQueryBuilder; |
10 | |
11 | require_once __DIR__ . '/Maintenance.php'; |
12 | |
13 | /** |
14 | * Maintenance script for populating parser cache with parsoid output. |
15 | * |
16 | * @since 1.41 |
17 | * |
18 | * @license GPL-2.0-or-later |
19 | * @author Richika Rana |
20 | */ |
21 | class PrewarmParsoidParserCache extends Maintenance { |
22 | private int $forceParse = 0; |
23 | private ParsoidOutputAccess $parsoidOutputAccess; |
24 | private PageLookup $pageLookup; |
25 | private RevisionLookup $revisionLookup; |
26 | |
27 | public function __construct() { |
28 | parent::__construct(); |
29 | |
30 | $this->addDescription( |
31 | 'Populate parser cache with parsoid output. By default, script attempt to run' . |
32 | 'for supported content model pages (in a specified batch if provided)' |
33 | ); |
34 | $this->addOption( |
35 | 'force', |
36 | 'Re-parse pages even if the cached entry seems up to date', |
37 | false, |
38 | false |
39 | ); |
40 | $this->addOption( 'start-from', 'Start from this page ID', false, true ); |
41 | $this->addOption( 'namespace', 'Filter pages in this namespace', false, true ); |
42 | $this->setBatchSize( 100 ); |
43 | } |
44 | |
45 | private function getPageLookup(): PageLookup { |
46 | $this->pageLookup = $this->getServiceContainer()->getPageStore(); |
47 | return $this->pageLookup; |
48 | } |
49 | |
50 | private function getRevisionLookup(): RevisionLookup { |
51 | $this->revisionLookup = $this->getServiceContainer()->getRevisionLookup(); |
52 | return $this->revisionLookup; |
53 | } |
54 | |
55 | private function getParsoidOutputAccess(): ParsoidOutputAccess { |
56 | $this->parsoidOutputAccess = $this->getServiceContainer()->getParsoidOutputAccess(); |
57 | return $this->parsoidOutputAccess; |
58 | } |
59 | |
60 | private function getQueryBuilder(): SelectQueryBuilder { |
61 | $dbr = $this->getReplicaDB(); |
62 | |
63 | return $dbr->newSelectQueryBuilder() |
64 | ->select( [ 'page_id' ] ) |
65 | ->from( 'page' ) |
66 | ->caller( __METHOD__ ) |
67 | ->orderBy( 'page_id', SelectQueryBuilder::SORT_ASC ); |
68 | } |
69 | |
70 | private function parse( |
71 | PageIdentity $page, |
72 | RevisionRecord $revision |
73 | ) { |
74 | return $this->parsoidOutputAccess->getParserOutput( |
75 | $page, |
76 | ParserOptions::newFromAnon(), |
77 | $revision, |
78 | $this->forceParse |
79 | ); |
80 | } |
81 | |
82 | /* |
83 | * NamespaceInfo::getCanonicalIndex() requires the namespace to be in lowercase, |
84 | * so let's do some normalization and return its canonical index. |
85 | * |
86 | * @param string $namespace The namespace string from the command line |
87 | * @return int The canonical index of the namespace |
88 | */ |
89 | private function normalizeNamespace( string $namespace ): int { |
90 | return $this->getServiceContainer()->getNamespaceInfo() |
91 | ->getCanonicalIndex( strtolower( $namespace ) ); |
92 | } |
93 | |
94 | /** |
95 | * Populate parser cache with parsoid output. |
96 | * |
97 | * @return bool |
98 | */ |
99 | public function execute() { |
100 | $force = $this->getOption( 'force' ); |
101 | $startFrom = $this->getOption( 'start-from' ); |
102 | |
103 | // We need the namespace index instead of the name to perform the query |
104 | // on, because that's what the page table stores (in the page_namespace field). |
105 | $namespaceIndex = null; |
106 | $namespace = $this->getOption( 'namespace' ); |
107 | if ( $namespace !== null ) { |
108 | $namespaceIndex = $this->normalizeNamespace( $namespace ); |
109 | } |
110 | |
111 | if ( $force !== null ) { |
112 | // If --force is supplied, for a parse for supported pages or supported |
113 | // pages in the specified batch. |
114 | $this->forceParse = ParserOutputAccess::OPT_FORCE_PARSE; |
115 | } |
116 | |
117 | $startFrom = (int)$startFrom; |
118 | |
119 | $this->output( "\nWarming parsoid parser cache with Parsoid output...\n\n" ); |
120 | while ( true ) { |
121 | $query = $this->getQueryBuilder(); |
122 | if ( $namespaceIndex !== null ) { |
123 | $query = $query->where( [ 'page_namespace' => $namespaceIndex ] ); |
124 | } |
125 | $query = $query->where( 'page_id >= ' . $startFrom ) |
126 | ->limit( $this->getBatchSize() ); |
127 | |
128 | $result = $query->fetchResultSet(); |
129 | |
130 | if ( !$result->numRows() ) { |
131 | break; |
132 | } |
133 | |
134 | $currentBatch = $startFrom + ( $this->getBatchSize() - 1 ); |
135 | $this->output( "\n\nBatch: $startFrom - $currentBatch\n----\n" ); |
136 | |
137 | // Look through pages by pageId and populate the parserCache |
138 | foreach ( $result as $row ) { |
139 | $page = $this->getPageLookup()->getPageById( $row->page_id ); |
140 | $startFrom = ( (int)$row->page_id + 1 ); |
141 | |
142 | if ( $page === null ) { |
143 | $this->output( "\n[Skipped] Page ID: $row->page_id not found.\n" ); |
144 | continue; |
145 | } |
146 | |
147 | $latestRevision = $page->getLatest(); |
148 | $revision = $this->getRevisionLookup()->getRevisionById( $latestRevision ); |
149 | $mainSlot = $revision->getSlot( SlotRecord::MAIN ); |
150 | |
151 | // POA will write a dummy output to PC, but we don't want that here. Just skip! |
152 | if ( !$this->getParsoidOutputAccess()->supportsContentModel( $mainSlot->getModel() ) ) { |
153 | $this->output( |
154 | '[Skipped] Content model "' . |
155 | $mainSlot->getModel() . |
156 | "\" not supported for page ID: $row->page_id.\n" |
157 | ); |
158 | continue; |
159 | } |
160 | |
161 | $status = $this->parse( $page, $revision ); |
162 | if ( !$status->isOK() ) { |
163 | $this->output( |
164 | __METHOD__ . |
165 | ": Error parsing page ID: $row->page_id or writing to parser cache\n" |
166 | ); |
167 | continue; |
168 | } |
169 | |
170 | $this->output( "[Done] Page ID: $row->page_id ✔️\n" ); |
171 | } |
172 | $this->waitForReplication(); |
173 | } |
174 | |
175 | $this->output( "\nDone pre-warming parsoid parser cache...\n" ); |
176 | |
177 | return true; |
178 | } |
179 | } |
180 | |
181 | $maintClass = PrewarmParsoidParserCache::class; |
182 | require_once RUN_MAINTENANCE_IF_MAIN; |