MediaWiki master
PageEditStash.php
Go to the documentation of this file.
1<?php
21namespace MediaWiki\Storage;
22
23use BagOStuff;
24use Content;
25use Liuggio\StatsdClient\Factory\StatsdDataFactoryInterface;
37use Psr\Log\LoggerInterface;
38use stdClass;
40use Wikimedia\ScopedCallback;
41use WikiPage;
42
56 private $cache;
58 private $dbProvider;
60 private $logger;
62 private $stats;
64 private $hookRunner;
66 private $userEditTracker;
68 private $userFactory;
70 private $wikiPageFactory;
72 private $initiator;
73
74 public const ERROR_NONE = 'stashed';
75 public const ERROR_PARSE = 'error_parse';
76 public const ERROR_CACHE = 'error_cache';
77 public const ERROR_UNCACHEABLE = 'uncacheable';
78 public const ERROR_BUSY = 'busy';
79
80 public const PRESUME_FRESH_TTL_SEC = 30;
81 public const MAX_CACHE_TTL = 300; // 5 minutes
82 public const MAX_SIGNATURE_TTL = 60;
83
84 private const MAX_CACHE_RECENT = 2;
85
86 public const INITIATOR_USER = 1;
87 public const INITIATOR_JOB_OR_CLI = 2;
88
100 public function __construct(
101 BagOStuff $cache,
102 IConnectionProvider $dbProvider,
103 LoggerInterface $logger,
104 StatsdDataFactoryInterface $stats,
105 UserEditTracker $userEditTracker,
106 UserFactory $userFactory,
107 WikiPageFactory $wikiPageFactory,
108 HookContainer $hookContainer,
109 $initiator
110 ) {
111 $this->cache = $cache;
112 $this->dbProvider = $dbProvider;
113 $this->logger = $logger;
114 $this->stats = $stats;
115 $this->userEditTracker = $userEditTracker;
116 $this->userFactory = $userFactory;
117 $this->wikiPageFactory = $wikiPageFactory;
118 $this->hookRunner = new HookRunner( $hookContainer );
119 $this->initiator = $initiator;
120 }
121
129 public function parseAndCache( $pageUpdater, Content $content, UserIdentity $user, string $summary ) {
130 $logger = $this->logger;
131
132 if ( $pageUpdater instanceof WikiPage ) {
133 wfDeprecated( __METHOD__ . ' with WikiPage instance', '1.42' );
134 $pageUpdater = $pageUpdater->newPageUpdater( $user );
135 }
136
137 $page = $pageUpdater->getPage();
138 $key = $this->getStashKey( $page, $this->getContentHash( $content ), $user );
139 $fname = __METHOD__;
140
141 // Use the primary DB to allow for fast blocking locks on the "save path" where this
142 // value might actually be used to complete a page edit. If the edit submission request
143 // happens before this edit stash requests finishes, then the submission will block until
144 // the stash request finishes parsing. For the lock acquisition below, there is not much
145 // need to duplicate parsing of the same content/user/summary bundle, so try to avoid
146 // blocking at all here.
147 $dbw = $this->dbProvider->getPrimaryDatabase();
148 if ( !$dbw->lock( $key, $fname, 0 ) ) {
149 // De-duplicate requests on the same key
150 return self::ERROR_BUSY;
151 }
153 $unlocker = new ScopedCallback( static function () use ( $dbw, $key, $fname ) {
154 $dbw->unlock( $key, $fname );
155 } );
156
157 $cutoffTime = time() - self::PRESUME_FRESH_TTL_SEC;
158
159 // Reuse any freshly build matching edit stash cache
160 $editInfo = $this->getStashValue( $key );
161 if ( $editInfo && (int)wfTimestamp( TS_UNIX, $editInfo->timestamp ) >= $cutoffTime ) {
162 $alreadyCached = true;
163 } else {
164 $pageUpdater->setContent( SlotRecord::MAIN, $content );
165
166 $update = $pageUpdater->prepareUpdate( EDIT_INTERNAL ); // applies pre-safe transform
167 $output = $update->getCanonicalParserOutput(); // causes content to be parsed
168 $output->setCacheTime( $update->getRevision()->getTimestamp() );
169
170 // emulate a cache value that kind of looks like a PreparedEdit, for use below
171 $editInfo = (object)[
172 'pstContent' => $update->getRawContent( SlotRecord::MAIN ),
173 'output' => $output,
174 'timestamp' => $output->getCacheTime()
175 ];
176
177 $alreadyCached = false;
178 }
179
180 $logContext = [ 'cachekey' => $key, 'title' => (string)$page ];
181
182 if ( $editInfo->output ) {
183 // Let extensions add ParserOutput metadata or warm other caches
184 $legacyUser = $this->userFactory->newFromUserIdentity( $user );
185 $legacyPage = $this->wikiPageFactory->newFromTitle( $page );
186 $this->hookRunner->onParserOutputStashForEdit(
187 $legacyPage, $content, $editInfo->output, $summary, $legacyUser );
188
189 if ( $alreadyCached ) {
190 $logger->debug( "Parser output for key '{cachekey}' already cached.", $logContext );
191
192 return self::ERROR_NONE;
193 }
194
195 $code = $this->storeStashValue(
196 $key,
197 $editInfo->pstContent,
198 $editInfo->output,
199 $editInfo->timestamp,
200 $user
201 );
202
203 if ( $code === true ) {
204 $logger->debug( "Cached parser output for key '{cachekey}'.", $logContext );
205
206 return self::ERROR_NONE;
207 } elseif ( $code === 'uncacheable' ) {
208 $logger->info(
209 "Uncacheable parser output for key '{cachekey}' [{code}].",
210 $logContext + [ 'code' => $code ]
211 );
212
214 } else {
215 $logger->error(
216 "Failed to cache parser output for key '{cachekey}'.",
217 $logContext + [ 'code' => $code ]
218 );
219
220 return self::ERROR_CACHE;
221 }
222 }
223
224 return self::ERROR_PARSE;
225 }
226
247 public function checkCache( PageIdentity $page, Content $content, UserIdentity $user ) {
248 $legacyUser = $this->userFactory->newFromUserIdentity( $user );
249 if (
250 // The context is not an HTTP POST request
251 !$legacyUser->getRequest()->wasPosted() ||
252 // The context is a CLI script or a job runner HTTP POST request
253 $this->initiator !== self::INITIATOR_USER ||
254 // The editor account is a known bot
255 $legacyUser->isBot()
256 ) {
257 // Avoid wasted queries and statsd pollution
258 return false;
259 }
260
261 $logger = $this->logger;
262
263 $key = $this->getStashKey( $page, $this->getContentHash( $content ), $user );
264 $logContext = [
265 'key' => $key,
266 'title' => (string)$page,
267 'user' => $user->getName()
268 ];
269
270 $editInfo = $this->getAndWaitForStashValue( $key );
271 if ( !is_object( $editInfo ) || !$editInfo->output ) {
272 $this->incrStatsByContent( 'cache_misses.no_stash', $content );
273 if ( $this->recentStashEntryCount( $user ) > 0 ) {
274 $logger->info( "Empty cache for key '{key}' but not for user.", $logContext );
275 } else {
276 $logger->debug( "Empty cache for key '{key}'.", $logContext );
277 }
278
279 return false;
280 }
281
282 $age = time() - (int)wfTimestamp( TS_UNIX, $editInfo->output->getCacheTime() );
283 $logContext['age'] = $age;
284
285 $isCacheUsable = true;
286 if ( $age <= self::PRESUME_FRESH_TTL_SEC ) {
287 // Assume nothing changed in this time
288 $this->incrStatsByContent( 'cache_hits.presumed_fresh', $content );
289 $logger->debug( "Timestamp-based cache hit for key '{key}'.", $logContext );
290 } elseif ( !$user->isRegistered() ) {
291 $lastEdit = $this->lastEditTime( $user );
292 $cacheTime = $editInfo->output->getCacheTime();
293 if ( $lastEdit < $cacheTime ) {
294 // Logged-out user made no local upload/template edits in the meantime
295 $this->incrStatsByContent( 'cache_hits.presumed_fresh', $content );
296 $logger->debug( "Edit check based cache hit for key '{key}'.", $logContext );
297 } else {
298 $isCacheUsable = false;
299 $this->incrStatsByContent( 'cache_misses.proven_stale', $content );
300 $logger->info( "Stale cache for key '{key}' due to outside edits.", $logContext );
301 }
302 } else {
303 if ( $editInfo->edits === $this->userEditTracker->getUserEditCount( $user ) ) {
304 // Logged-in user made no local upload/template edits in the meantime
305 $this->incrStatsByContent( 'cache_hits.presumed_fresh', $content );
306 $logger->debug( "Edit count based cache hit for key '{key}'.", $logContext );
307 } else {
308 $isCacheUsable = false;
309 $this->incrStatsByContent( 'cache_misses.proven_stale', $content );
310 $logger->info( "Stale cache for key '{key}'due to outside edits.", $logContext );
311 }
312 }
313
314 if ( !$isCacheUsable ) {
315 return false;
316 }
317
318 if ( $editInfo->output->getOutputFlag( ParserOutputFlags::VARY_REVISION ) ) {
319 // This can be used for the initial parse, e.g. for filters or doUserEditContent(),
320 // but a second parse will be triggered in doEditUpdates() no matter what
321 $logger->info(
322 "Cache for key '{key}' has vary-revision; post-insertion parse inevitable.",
323 $logContext
324 );
325 } else {
326 static $flagsMaybeReparse = [
327 // Similar to the above if we didn't guess the ID correctly
328 ParserOutputFlags::VARY_REVISION_ID,
329 // Similar to the above if we didn't guess the timestamp correctly
330 ParserOutputFlags::VARY_REVISION_TIMESTAMP,
331 // Similar to the above if we didn't guess the content correctly
332 ParserOutputFlags::VARY_REVISION_SHA1,
333 // Similar to the above if we didn't guess page ID correctly
334 ParserOutputFlags::VARY_PAGE_ID,
335 ];
336 foreach ( $flagsMaybeReparse as $flag ) {
337 if ( $editInfo->output->getOutputFlag( $flag ) ) {
338 $logger->debug(
339 "Cache for key '{key}' has $flag; post-insertion parse possible.",
340 $logContext
341 );
342 }
343 }
344 }
345
346 return $editInfo;
347 }
348
353 private function incrStatsByContent( $subkey, Content $content ) {
354 $this->stats->increment( 'editstash.' . $subkey ); // overall for b/c
355 $this->stats->increment( 'editstash_by_model.' . $content->getModel() . '.' . $subkey );
356 }
357
362 private function getAndWaitForStashValue( $key ) {
363 $editInfo = $this->getStashValue( $key );
364
365 if ( !$editInfo ) {
366 $start = microtime( true );
367 // We ignore user aborts and keep parsing. Block on any prior parsing
368 // so as to use its results and make use of the time spent parsing.
369 $dbw = $this->dbProvider->getPrimaryDatabase();
370 if ( $dbw->lock( $key, __METHOD__, 30 ) ) {
371 $editInfo = $this->getStashValue( $key );
372 $dbw->unlock( $key, __METHOD__ );
373 }
374
375 $timeMs = 1000 * max( 0, microtime( true ) - $start );
376 $this->stats->timing( 'editstash.lock_wait_time', $timeMs );
377 }
378
379 return $editInfo;
380 }
381
386 public function fetchInputText( $textHash ) {
387 $textKey = $this->cache->makeKey( 'stashedit', 'text', $textHash );
388
389 return $this->cache->get( $textKey );
390 }
391
397 public function stashInputText( $text, $textHash ) {
398 $textKey = $this->cache->makeKey( 'stashedit', 'text', $textHash );
399
400 return $this->cache->set(
401 $textKey,
402 $text,
403 self::MAX_CACHE_TTL,
404 BagOStuff::WRITE_ALLOW_SEGMENTS
405 );
406 }
407
412 private function lastEditTime( UserIdentity $user ) {
413 $time = $this->dbProvider->getReplicaDatabase()->newSelectQueryBuilder()
414 ->select( 'MAX(rc_timestamp)' )
415 ->from( 'recentchanges' )
416 ->join( 'actor', null, 'actor_id=rc_actor' )
417 ->where( [ 'actor_name' => $user->getName() ] )
418 ->caller( __METHOD__ )
419 ->fetchField();
420
421 return wfTimestampOrNull( TS_MW, $time );
422 }
423
430 private function getContentHash( Content $content ) {
431 return sha1( implode( "\n", [
432 $content->getModel(),
433 $content->getDefaultFormat(),
434 $content->serialize( $content->getDefaultFormat() )
435 ] ) );
436 }
437
450 private function getStashKey( PageIdentity $page, $contentHash, UserIdentity $user ) {
451 return $this->cache->makeKey(
452 'stashedit-info-v2',
453 md5( "{$page->getNamespace()}\n{$page->getDBkey()}" ),
454 // Account for the edit model/text
455 $contentHash,
456 // Account for user name related variables like signatures
457 md5( "{$user->getId()}\n{$user->getName()}" )
458 );
459 }
460
465 private function getStashValue( $key ) {
466 $serial = $this->cache->get( $key );
467
468 return $this->unserializeStashInfo( $serial );
469 }
470
483 private function storeStashValue(
484 $key,
485 Content $pstContent,
486 ParserOutput $parserOutput,
487 $timestamp,
488 UserIdentity $user
489 ) {
490 // If an item is renewed, mind the cache TTL determined by config and parser functions.
491 // Put an upper limit on the TTL to avoid extreme template/file staleness.
492 $age = time() - (int)wfTimestamp( TS_UNIX, $parserOutput->getCacheTime() );
493 $ttl = min( $parserOutput->getCacheExpiry() - $age, self::MAX_CACHE_TTL );
494 // Avoid extremely stale user signature timestamps (T84843)
495 if ( $parserOutput->getOutputFlag( ParserOutputFlags::USER_SIGNATURE ) ) {
496 $ttl = min( $ttl, self::MAX_SIGNATURE_TTL );
497 }
498
499 if ( $ttl <= 0 ) {
500 return 'uncacheable'; // low TTL due to a tag, magic word, or signature?
501 }
502
503 // Store what is actually needed and split the output into another key (T204742)
504 $stashInfo = (object)[
505 'pstContent' => $pstContent,
506 'output' => $parserOutput,
507 'timestamp' => $timestamp,
508 'edits' => $this->userEditTracker->getUserEditCount( $user ),
509 ];
510 $serial = $this->serializeStashInfo( $stashInfo );
511 if ( $serial === false ) {
512 return 'store_error';
513 }
514
515 $ok = $this->cache->set( $key, $serial, $ttl, BagOStuff::WRITE_ALLOW_SEGMENTS );
516 if ( $ok ) {
517 // These blobs can waste slots in low cardinality memcached slabs
518 $this->pruneExcessStashedEntries( $user, $key );
519 }
520
521 return $ok ? true : 'store_error';
522 }
523
528 private function pruneExcessStashedEntries( UserIdentity $user, $newKey ) {
529 $key = $this->cache->makeKey( 'stash-edit-recent', sha1( $user->getName() ) );
530
531 $keyList = $this->cache->get( $key ) ?: [];
532 if ( count( $keyList ) >= self::MAX_CACHE_RECENT ) {
533 $oldestKey = array_shift( $keyList );
534 $this->cache->delete( $oldestKey, BagOStuff::WRITE_PRUNE_SEGMENTS );
535 }
536
537 $keyList[] = $newKey;
538 $this->cache->set( $key, $keyList, 2 * self::MAX_CACHE_TTL );
539 }
540
545 private function recentStashEntryCount( UserIdentity $user ) {
546 $key = $this->cache->makeKey( 'stash-edit-recent', sha1( $user->getName() ) );
547
548 return count( $this->cache->get( $key ) ?: [] );
549 }
550
551 private function serializeStashInfo( stdClass $stashInfo ) {
552 // @todo: use JSON with ParserOutput and Content
553 return serialize( $stashInfo );
554 }
555
556 private function unserializeStashInfo( $serial ) {
557 if ( is_string( $serial ) ) {
558 // @todo: use JSON with ParserOutput and Content
559 $stashInfo = unserialize( $serial );
560 if ( is_object( $stashInfo ) && $stashInfo->output instanceof ParserOutput ) {
561 return $stashInfo;
562 }
563 }
564
565 return false;
566 }
567}
const EDIT_INTERNAL
Definition Defines.php:133
wfTimestampOrNull( $outputtype=TS_UNIX, $ts=null)
Return a formatted timestamp, or null if input is null.
wfTimestamp( $outputtype=TS_UNIX, $ts=0)
Get a timestamp string in one of various formats.
wfDeprecated( $function, $version=false, $component=false, $callerOffset=2)
Logs a warning that a deprecated feature was used.
Class representing a cache/ephemeral data store.
Definition BagOStuff.php:85
This class provides an implementation of the core hook interfaces, forwarding hook calls to HookConta...
Service for creating WikiPage objects.
ParserOutput is a rendering of a Content object or a message.
Value object representing a content slot associated with a page revision.
Manage the pre-emptive page parsing for edits to wiki pages.
checkCache(PageIdentity $page, Content $content, UserIdentity $user)
Check that a prepared edit is in cache and still up-to-date.
parseAndCache( $pageUpdater, Content $content, UserIdentity $user, string $summary)
__construct(BagOStuff $cache, IConnectionProvider $dbProvider, LoggerInterface $logger, StatsdDataFactoryInterface $stats, UserEditTracker $userEditTracker, UserFactory $userFactory, WikiPageFactory $wikiPageFactory, HookContainer $hookContainer, $initiator)
Track info about user edit counts and timings.
Creates User objects.
Base representation for an editable wiki page.
Definition WikiPage.php:79
Base interface for representing page content.
Definition Content.php:37
getModel()
Returns the ID of the content model used by this Content object.
serialize( $format=null)
Convenience method for serializing this Content object.
getDefaultFormat()
Convenience method that returns the default serialization format for the content model that this Cont...
Interface for objects (potentially) representing an editable wiki page.
This is a hook handler interface, see docs/Hooks.md.
Interface for objects representing user identity.
isRegistered()
This must be equivalent to getId() != 0 and is provided for code readability.
Provide primary and replica IDatabase connections.