MediaWiki 1.40.4
PageEditStash.php
Go to the documentation of this file.
1<?php
21namespace MediaWiki\Storage;
22
23use BagOStuff;
24use Content;
25use Liuggio\StatsdClient\Factory\StatsdDataFactoryInterface;
36use ParserOutput;
37use Psr\Log\LoggerInterface;
38use stdClass;
40use Wikimedia\ScopedCallback;
41use WikiPage;
42
56 private $cache;
58 private $lb;
60 private $logger;
62 private $stats;
64 private $hookRunner;
66 private $userEditTracker;
68 private $userFactory;
70 private $wikiPageFactory;
72 private $initiator;
73
74 public const ERROR_NONE = 'stashed';
75 public const ERROR_PARSE = 'error_parse';
76 public const ERROR_CACHE = 'error_cache';
77 public const ERROR_UNCACHEABLE = 'uncacheable';
78 public const ERROR_BUSY = 'busy';
79
80 public const PRESUME_FRESH_TTL_SEC = 30;
81 public const MAX_CACHE_TTL = 300; // 5 minutes
82 public const MAX_SIGNATURE_TTL = 60;
83
84 private const MAX_CACHE_RECENT = 2;
85
86 public const INITIATOR_USER = 1;
87 public const INITIATOR_JOB_OR_CLI = 2;
88
100 public function __construct(
101 BagOStuff $cache,
102 ILoadBalancer $lb,
103 LoggerInterface $logger,
104 StatsdDataFactoryInterface $stats,
105 UserEditTracker $userEditTracker,
106 UserFactory $userFactory,
107 WikiPageFactory $wikiPageFactory,
108 HookContainer $hookContainer,
109 $initiator
110 ) {
111 $this->cache = $cache;
112 $this->lb = $lb;
113 $this->logger = $logger;
114 $this->stats = $stats;
115 $this->userEditTracker = $userEditTracker;
116 $this->userFactory = $userFactory;
117 $this->wikiPageFactory = $wikiPageFactory;
118 $this->hookRunner = new HookRunner( $hookContainer );
119 $this->initiator = $initiator;
120 }
121
129 public function parseAndCache( $pageUpdater, Content $content, UserIdentity $user, string $summary ) {
130 $logger = $this->logger;
131
132 if ( $pageUpdater instanceof WikiPage ) {
133 // TODO: Trigger deprecation warning once extensions have been fixed.
134 // Or better, create PageUpdater::prepareAndStash and deprecate this method.
135 $pageUpdater = $pageUpdater->newPageUpdater( $user );
136 }
137
138 $page = $pageUpdater->getPage();
139 $key = $this->getStashKey( $page, $this->getContentHash( $content ), $user );
140 $fname = __METHOD__;
141
142 // Use the primary DB to allow for fast blocking locks on the "save path" where this
143 // value might actually be used to complete a page edit. If the edit submission request
144 // happens before this edit stash requests finishes, then the submission will block until
145 // the stash request finishes parsing. For the lock acquisition below, there is not much
146 // need to duplicate parsing of the same content/user/summary bundle, so try to avoid
147 // blocking at all here.
148 $dbw = $this->lb->getConnectionRef( DB_PRIMARY );
149 if ( !$dbw->lock( $key, $fname, 0 ) ) {
150 // De-duplicate requests on the same key
151 return self::ERROR_BUSY;
152 }
154 $unlocker = new ScopedCallback( static function () use ( $dbw, $key, $fname ) {
155 $dbw->unlock( $key, $fname );
156 } );
157
158 $cutoffTime = time() - self::PRESUME_FRESH_TTL_SEC;
159
160 // Reuse any freshly build matching edit stash cache
161 $editInfo = $this->getStashValue( $key );
162 if ( $editInfo && (int)wfTimestamp( TS_UNIX, $editInfo->timestamp ) >= $cutoffTime ) {
163 $alreadyCached = true;
164 } else {
165 $pageUpdater->setContent( SlotRecord::MAIN, $content );
166
167 $update = $pageUpdater->prepareUpdate( EDIT_INTERNAL ); // applies pre-safe transform
168 $output = $update->getCanonicalParserOutput(); // causes content to be parsed
169 $output->setCacheTime( $update->getRevision()->getTimestamp() );
170
171 // emulate a cache value that kind of looks like a PreparedEdit, for use below
172 $editInfo = (object)[
173 'pstContent' => $update->getRawContent( SlotRecord::MAIN ),
174 'output' => $output,
175 'timestamp' => $output->getCacheTime()
176 ];
177
178 $alreadyCached = false;
179 }
180
181 $logContext = [ 'cachekey' => $key, 'title' => (string)$page ];
182
183 if ( $editInfo->output ) {
184 // Let extensions add ParserOutput metadata or warm other caches
185 $legacyUser = $this->userFactory->newFromUserIdentity( $user );
186 $legacyPage = $this->wikiPageFactory->newFromTitle( $page );
187 $this->hookRunner->onParserOutputStashForEdit(
188 $legacyPage, $content, $editInfo->output, $summary, $legacyUser );
189
190 if ( $alreadyCached ) {
191 $logger->debug( "Parser output for key '{cachekey}' already cached.", $logContext );
192
193 return self::ERROR_NONE;
194 }
195
196 $code = $this->storeStashValue(
197 $key,
198 $editInfo->pstContent,
199 $editInfo->output,
200 $editInfo->timestamp,
201 $user
202 );
203
204 if ( $code === true ) {
205 $logger->debug( "Cached parser output for key '{cachekey}'.", $logContext );
206
207 return self::ERROR_NONE;
208 } elseif ( $code === 'uncacheable' ) {
209 $logger->info(
210 "Uncacheable parser output for key '{cachekey}' [{code}].",
211 $logContext + [ 'code' => $code ]
212 );
213
215 } else {
216 $logger->error(
217 "Failed to cache parser output for key '{cachekey}'.",
218 $logContext + [ 'code' => $code ]
219 );
220
221 return self::ERROR_CACHE;
222 }
223 }
224
225 return self::ERROR_PARSE;
226 }
227
248 public function checkCache( PageIdentity $page, Content $content, UserIdentity $user ) {
249 $legacyUser = $this->userFactory->newFromUserIdentity( $user );
250 if (
251 // The context is not an HTTP POST request
252 !$legacyUser->getRequest()->wasPosted() ||
253 // The context is a CLI script or a job runner HTTP POST request
254 $this->initiator !== self::INITIATOR_USER ||
255 // The editor account is a known bot
256 $legacyUser->isBot()
257 ) {
258 // Avoid wasted queries and statsd pollution
259 return false;
260 }
261
262 $logger = $this->logger;
263
264 $key = $this->getStashKey( $page, $this->getContentHash( $content ), $user );
265 $logContext = [
266 'key' => $key,
267 'title' => (string)$page,
268 'user' => $user->getName()
269 ];
270
271 $editInfo = $this->getAndWaitForStashValue( $key );
272 if ( !is_object( $editInfo ) || !$editInfo->output ) {
273 $this->incrStatsByContent( 'cache_misses.no_stash', $content );
274 if ( $this->recentStashEntryCount( $user ) > 0 ) {
275 $logger->info( "Empty cache for key '{key}' but not for user.", $logContext );
276 } else {
277 $logger->debug( "Empty cache for key '{key}'.", $logContext );
278 }
279
280 return false;
281 }
282
283 $age = time() - (int)wfTimestamp( TS_UNIX, $editInfo->output->getCacheTime() );
284 $logContext['age'] = $age;
285
286 $isCacheUsable = true;
287 if ( $age <= self::PRESUME_FRESH_TTL_SEC ) {
288 // Assume nothing changed in this time
289 $this->incrStatsByContent( 'cache_hits.presumed_fresh', $content );
290 $logger->debug( "Timestamp-based cache hit for key '{key}'.", $logContext );
291 } elseif ( !$user->isRegistered() ) {
292 $lastEdit = $this->lastEditTime( $user );
293 $cacheTime = $editInfo->output->getCacheTime();
294 if ( $lastEdit < $cacheTime ) {
295 // Logged-out user made no local upload/template edits in the meantime
296 $this->incrStatsByContent( 'cache_hits.presumed_fresh', $content );
297 $logger->debug( "Edit check based cache hit for key '{key}'.", $logContext );
298 } else {
299 $isCacheUsable = false;
300 $this->incrStatsByContent( 'cache_misses.proven_stale', $content );
301 $logger->info( "Stale cache for key '{key}' due to outside edits.", $logContext );
302 }
303 } else {
304 if ( $editInfo->edits === $this->userEditTracker->getUserEditCount( $user ) ) {
305 // Logged-in user made no local upload/template edits in the meantime
306 $this->incrStatsByContent( 'cache_hits.presumed_fresh', $content );
307 $logger->debug( "Edit count based cache hit for key '{key}'.", $logContext );
308 } else {
309 $isCacheUsable = false;
310 $this->incrStatsByContent( 'cache_misses.proven_stale', $content );
311 $logger->info( "Stale cache for key '{key}'due to outside edits.", $logContext );
312 }
313 }
314
315 if ( !$isCacheUsable ) {
316 return false;
317 }
318
319 if ( $editInfo->output->getOutputFlag( ParserOutputFlags::VARY_REVISION ) ) {
320 // This can be used for the initial parse, e.g. for filters or doUserEditContent(),
321 // but a second parse will be triggered in doEditUpdates() no matter what
322 $logger->info(
323 "Cache for key '{key}' has vary-revision; post-insertion parse inevitable.",
324 $logContext
325 );
326 } else {
327 static $flagsMaybeReparse = [
328 // Similar to the above if we didn't guess the ID correctly
329 ParserOutputFlags::VARY_REVISION_ID,
330 // Similar to the above if we didn't guess the timestamp correctly
331 ParserOutputFlags::VARY_REVISION_TIMESTAMP,
332 // Similar to the above if we didn't guess the content correctly
333 ParserOutputFlags::VARY_REVISION_SHA1,
334 // Similar to the above if we didn't guess page ID correctly
335 ParserOutputFlags::VARY_PAGE_ID,
336 ];
337 foreach ( $flagsMaybeReparse as $flag ) {
338 if ( $editInfo->output->getOutputFlag( $flag ) ) {
339 $logger->debug(
340 "Cache for key '{key}' has $flag; post-insertion parse possible.",
341 $logContext
342 );
343 }
344 }
345 }
346
347 return $editInfo;
348 }
349
354 private function incrStatsByContent( $subkey, Content $content ) {
355 $this->stats->increment( 'editstash.' . $subkey ); // overall for b/c
356 $this->stats->increment( 'editstash_by_model.' . $content->getModel() . '.' . $subkey );
357 }
358
363 private function getAndWaitForStashValue( $key ) {
364 $editInfo = $this->getStashValue( $key );
365
366 if ( !$editInfo ) {
367 $start = microtime( true );
368 // We ignore user aborts and keep parsing. Block on any prior parsing
369 // so as to use its results and make use of the time spent parsing.
370 $dbw = $this->lb->getConnection( DB_PRIMARY );
371 if ( $dbw->lock( $key, __METHOD__, 30 ) ) {
372 $editInfo = $this->getStashValue( $key );
373 $dbw->unlock( $key, __METHOD__ );
374 }
375
376 $timeMs = 1000 * max( 0, microtime( true ) - $start );
377 $this->stats->timing( 'editstash.lock_wait_time', $timeMs );
378 }
379
380 return $editInfo;
381 }
382
387 public function fetchInputText( $textHash ) {
388 $textKey = $this->cache->makeKey( 'stashedit', 'text', $textHash );
389
390 return $this->cache->get( $textKey );
391 }
392
398 public function stashInputText( $text, $textHash ) {
399 $textKey = $this->cache->makeKey( 'stashedit', 'text', $textHash );
400
401 return $this->cache->set(
402 $textKey,
403 $text,
404 self::MAX_CACHE_TTL,
405 BagOStuff::WRITE_ALLOW_SEGMENTS
406 );
407 }
408
413 private function lastEditTime( UserIdentity $user ) {
414 $db = $this->lb->getConnectionRef( DB_REPLICA );
415
416 $time = $db->newSelectQueryBuilder()
417 ->select( 'MAX(rc_timestamp)' )
418 ->from( 'recentchanges' )
419 ->join( 'actor', null, 'actor_id=rc_actor' )
420 ->where( [ 'actor_name' => $user->getName() ] )
421 ->caller( __METHOD__ )
422 ->fetchField();
423
424 return wfTimestampOrNull( TS_MW, $time );
425 }
426
433 private function getContentHash( Content $content ) {
434 return sha1( implode( "\n", [
435 $content->getModel(),
436 $content->getDefaultFormat(),
437 $content->serialize( $content->getDefaultFormat() )
438 ] ) );
439 }
440
453 private function getStashKey( PageIdentity $page, $contentHash, UserIdentity $user ) {
454 return $this->cache->makeKey(
455 'stashedit-info-v2',
456 md5( "{$page->getNamespace()}\n{$page->getDBkey()}" ),
457 // Account for the edit model/text
458 $contentHash,
459 // Account for user name related variables like signatures
460 md5( "{$user->getId()}\n{$user->getName()}" )
461 );
462 }
463
468 private function getStashValue( $key ) {
469 $serial = $this->cache->get( $key );
470
471 return $this->unserializeStashInfo( $serial );
472 }
473
486 private function storeStashValue(
487 $key,
488 Content $pstContent,
489 ParserOutput $parserOutput,
490 $timestamp,
491 UserIdentity $user
492 ) {
493 // If an item is renewed, mind the cache TTL determined by config and parser functions.
494 // Put an upper limit on the TTL to avoid extreme template/file staleness.
495 $age = time() - (int)wfTimestamp( TS_UNIX, $parserOutput->getCacheTime() );
496 $ttl = min( $parserOutput->getCacheExpiry() - $age, self::MAX_CACHE_TTL );
497 // Avoid extremely stale user signature timestamps (T84843)
498 if ( $parserOutput->getOutputFlag( ParserOutputFlags::USER_SIGNATURE ) ) {
499 $ttl = min( $ttl, self::MAX_SIGNATURE_TTL );
500 }
501
502 if ( $ttl <= 0 ) {
503 return 'uncacheable'; // low TTL due to a tag, magic word, or signature?
504 }
505
506 // Store what is actually needed and split the output into another key (T204742)
507 $stashInfo = (object)[
508 'pstContent' => $pstContent,
509 'output' => $parserOutput,
510 'timestamp' => $timestamp,
511 'edits' => $user->isRegistered()
512 ? $this->userEditTracker->getUserEditCount( $user )
513 : null,
514 ];
515 $serial = $this->serializeStashInfo( $stashInfo );
516 if ( $serial === false ) {
517 return 'store_error';
518 }
519
520 $ok = $this->cache->set( $key, $serial, $ttl, BagOStuff::WRITE_ALLOW_SEGMENTS );
521 if ( $ok ) {
522 // These blobs can waste slots in low cardinality memcached slabs
523 $this->pruneExcessStashedEntries( $user, $key );
524 }
525
526 return $ok ? true : 'store_error';
527 }
528
533 private function pruneExcessStashedEntries( UserIdentity $user, $newKey ) {
534 $key = $this->cache->makeKey( 'stash-edit-recent', sha1( $user->getName() ) );
535
536 $keyList = $this->cache->get( $key ) ?: [];
537 if ( count( $keyList ) >= self::MAX_CACHE_RECENT ) {
538 $oldestKey = array_shift( $keyList );
539 $this->cache->delete( $oldestKey, BagOStuff::WRITE_PRUNE_SEGMENTS );
540 }
541
542 $keyList[] = $newKey;
543 $this->cache->set( $key, $keyList, 2 * self::MAX_CACHE_TTL );
544 }
545
550 private function recentStashEntryCount( UserIdentity $user ) {
551 $key = $this->cache->makeKey( 'stash-edit-recent', sha1( $user->getName() ) );
552
553 return count( $this->cache->get( $key ) ?: [] );
554 }
555
556 private function serializeStashInfo( stdClass $stashInfo ) {
557 // @todo: use JSON with ParserOutput and Content
558 return serialize( $stashInfo );
559 }
560
561 private function unserializeStashInfo( $serial ) {
562 if ( is_string( $serial ) ) {
563 // @todo: use JSON with ParserOutput and Content
564 $stashInfo = unserialize( $serial );
565 if ( is_object( $stashInfo ) && $stashInfo->output instanceof ParserOutput ) {
566 return $stashInfo;
567 }
568 }
569
570 return false;
571 }
572}
const EDIT_INTERNAL
Definition Defines.php:133
wfTimestampOrNull( $outputtype=TS_UNIX, $ts=null)
Return a formatted timestamp, or null if input is null.
wfTimestamp( $outputtype=TS_UNIX, $ts=0)
Get a timestamp string in one of various formats.
Class representing a cache/ephemeral data store.
Definition BagOStuff.php:85
getCacheExpiry()
Returns the number of seconds after which this object should expire.
This class provides an implementation of the core hook interfaces, forwarding hook calls to HookConta...
Service for creating WikiPage objects.
Value object representing a content slot associated with a page revision.
Manage the pre-emptive page parsing for edits to wiki pages.
checkCache(PageIdentity $page, Content $content, UserIdentity $user)
Check that a prepared edit is in cache and still up-to-date.
parseAndCache( $pageUpdater, Content $content, UserIdentity $user, string $summary)
__construct(BagOStuff $cache, ILoadBalancer $lb, LoggerInterface $logger, StatsdDataFactoryInterface $stats, UserEditTracker $userEditTracker, UserFactory $userFactory, WikiPageFactory $wikiPageFactory, HookContainer $hookContainer, $initiator)
Track info about user edit counts and timings.
Creates User objects.
getOutputFlag(string $name)
Provides a uniform interface to various boolean flags stored in the ParserOutput.
Base representation for an editable wiki page.
Definition WikiPage.php:75
Base interface for representing page content.
Definition Content.php:37
Interface for objects (potentially) representing an editable wiki page.
This is a hook handler interface, see docs/Hooks.md.
Interface for objects representing user identity.
This class is a delegate to ILBFactory for a given database cluster.
const DB_REPLICA
Definition defines.php:26
const DB_PRIMARY
Definition defines.php:28
$content
Definition router.php:76
return true
Definition router.php:92