MediaWiki REL1_35
PageEditStash.php
Go to the documentation of this file.
1<?php
23namespace MediaWiki\Storage;
24
26use BagOStuff;
27use Content;
28use Liuggio\StatsdClient\Factory\StatsdDataFactoryInterface;
32use ParserOutput;
33use Psr\Log\LoggerInterface;
34use stdClass;
35use Title;
36use User;
38use Wikimedia\ScopedCallback;
39use WikiPage;
40
48 private $cache;
50 private $lb;
52 private $logger;
54 private $stats;
56 private $hookRunner;
58 private $initiator;
59
60 public const ERROR_NONE = 'stashed';
61 public const ERROR_PARSE = 'error_parse';
62 public const ERROR_CACHE = 'error_cache';
63 public const ERROR_UNCACHEABLE = 'uncacheable';
64 public const ERROR_BUSY = 'busy';
65
66 public const PRESUME_FRESH_TTL_SEC = 30;
67 public const MAX_CACHE_TTL = 300; // 5 minutes
68 public const MAX_SIGNATURE_TTL = 60;
69
70 private const MAX_CACHE_RECENT = 2;
71
72 public const INITIATOR_USER = 1;
73 public const INITIATOR_JOB_OR_CLI = 2;
74
83 public function __construct(
86 LoggerInterface $logger,
87 StatsdDataFactoryInterface $stats,
88 HookContainer $hookContainer,
90 ) {
91 $this->cache = $cache;
92 $this->lb = $lb;
93 $this->logger = $logger;
94 $this->stats = $stats;
95 $this->hookRunner = new HookRunner( $hookContainer );
96 $this->initiator = $initiator;
97 }
98
106 public function parseAndCache( WikiPage $page, Content $content, User $user, string $summary ) {
108
109 $title = $page->getTitle();
110 $key = $this->getStashKey( $title, $this->getContentHash( $content ), $user );
111 $fname = __METHOD__;
112
113 // Use the master DB to allow for fast blocking locks on the "save path" where this
114 // value might actually be used to complete a page edit. If the edit submission request
115 // happens before this edit stash requests finishes, then the submission will block until
116 // the stash request finishes parsing. For the lock acquisition below, there is not much
117 // need to duplicate parsing of the same content/user/summary bundle, so try to avoid
118 // blocking at all here.
119 $dbw = $this->lb->getConnectionRef( DB_MASTER );
120 if ( !$dbw->lock( $key, $fname, 0 ) ) {
121 // De-duplicate requests on the same key
122 return self::ERROR_BUSY;
123 }
125 $unlocker = new ScopedCallback( function () use ( $dbw, $key, $fname ) {
126 $dbw->unlock( $key, $fname );
127 } );
128
129 $cutoffTime = time() - self::PRESUME_FRESH_TTL_SEC;
130
131 // Reuse any freshly build matching edit stash cache
132 $editInfo = $this->getStashValue( $key );
133 if ( $editInfo && wfTimestamp( TS_UNIX, $editInfo->timestamp ) >= $cutoffTime ) {
134 $alreadyCached = true;
135 } else {
136 $format = $content->getDefaultFormat();
137 $editInfo = $page->prepareContentForEdit( $content, null, $user, $format, false );
138 $editInfo->output->setCacheTime( $editInfo->timestamp );
139 $alreadyCached = false;
140 }
141
142 $context = [ 'cachekey' => $key, 'title' => $title->getPrefixedText() ];
143
144 if ( $editInfo && $editInfo->output ) {
145 // Let extensions add ParserOutput metadata or warm other caches
146 $this->hookRunner->onParserOutputStashForEdit(
147 $page, $content, $editInfo->output, $summary, $user );
148
149 if ( $alreadyCached ) {
150 $logger->debug( "Parser output for key '{cachekey}' already cached.", $context );
151
152 return self::ERROR_NONE;
153 }
154
155 $code = $this->storeStashValue(
156 $key,
157 $editInfo->pstContent,
158 $editInfo->output,
159 $editInfo->timestamp,
160 $user
161 );
162
163 if ( $code === true ) {
164 $logger->debug( "Cached parser output for key '{cachekey}'.", $context );
165
166 return self::ERROR_NONE;
167 } elseif ( $code === 'uncacheable' ) {
168 $logger->info(
169 "Uncacheable parser output for key '{cachekey}' [{code}].",
170 $context + [ 'code' => $code ]
171 );
172
174 } else {
175 $logger->error(
176 "Failed to cache parser output for key '{cachekey}'.",
177 $context + [ 'code' => $code ]
178 );
179
180 return self::ERROR_CACHE;
181 }
182 }
183
184 return self::ERROR_PARSE;
185 }
186
207 public function checkCache( Title $title, Content $content, User $user ) {
208 if (
209 // The context is not an HTTP POST request
210 !$user->getRequest()->wasPosted() ||
211 // The context is a CLI script or a job runner HTTP POST request
212 $this->initiator !== self::INITIATOR_USER ||
213 // The editor account is a known bot
214 $user->isBot()
215 ) {
216 // Avoid wasted queries and statsd pollution
217 return false;
218 }
219
221
222 $key = $this->getStashKey( $title, $this->getContentHash( $content ), $user );
223 $context = [
224 'key' => $key,
225 'title' => $title->getPrefixedText(),
226 'user' => $user->getName()
227 ];
228
229 $editInfo = $this->getAndWaitForStashValue( $key );
230 if ( !is_object( $editInfo ) || !$editInfo->output ) {
231 $this->incrStatsByContent( 'cache_misses.no_stash', $content );
232 if ( $this->recentStashEntryCount( $user ) > 0 ) {
233 $logger->info( "Empty cache for key '{key}' but not for user.", $context );
234 } else {
235 $logger->debug( "Empty cache for key '{key}'.", $context );
236 }
237
238 return false;
239 }
240
241 $age = time() - (int)wfTimestamp( TS_UNIX, $editInfo->output->getCacheTime() );
242 $context['age'] = $age;
243
244 $isCacheUsable = true;
245 if ( $age <= self::PRESUME_FRESH_TTL_SEC ) {
246 // Assume nothing changed in this time
247 $this->incrStatsByContent( 'cache_hits.presumed_fresh', $content );
248 $logger->debug( "Timestamp-based cache hit for key '{key}'.", $context );
249 } elseif ( $user->isAnon() ) {
250 $lastEdit = $this->lastEditTime( $user );
251 $cacheTime = $editInfo->output->getCacheTime();
252 if ( $lastEdit < $cacheTime ) {
253 // Logged-out user made no local upload/template edits in the meantime
254 $this->incrStatsByContent( 'cache_hits.presumed_fresh', $content );
255 $logger->debug( "Edit check based cache hit for key '{key}'.", $context );
256 } else {
257 $isCacheUsable = false;
258 $this->incrStatsByContent( 'cache_misses.proven_stale', $content );
259 $logger->info( "Stale cache for key '{key}' due to outside edits.", $context );
260 }
261 } else {
262 if ( $editInfo->edits === $user->getEditCount() ) {
263 // Logged-in user made no local upload/template edits in the meantime
264 $this->incrStatsByContent( 'cache_hits.presumed_fresh', $content );
265 $logger->debug( "Edit count based cache hit for key '{key}'.", $context );
266 } else {
267 $isCacheUsable = false;
268 $this->incrStatsByContent( 'cache_misses.proven_stale', $content );
269 $logger->info( "Stale cache for key '{key}'due to outside edits.", $context );
270 }
271 }
272
273 if ( !$isCacheUsable ) {
274 return false;
275 }
276
277 if ( $editInfo->output->getFlag( 'vary-revision' ) ) {
278 // This can be used for the initial parse, e.g. for filters or doEditContent(),
279 // but a second parse will be triggered in doEditUpdates() no matter what
280 $logger->info(
281 "Cache for key '{key}' has vary-revision; post-insertion parse inevitable.",
282 $context
283 );
284 } else {
285 static $flagsMaybeReparse = [
286 // Similar to the above if we didn't guess the ID correctly
287 'vary-revision-id',
288 // Similar to the above if we didn't guess the timestamp correctly
289 'vary-revision-timestamp',
290 // Similar to the above if we didn't guess the content correctly
291 'vary-revision-sha1',
292 // Similar to the above if we didn't guess page ID correctly
293 'vary-page-id'
294 ];
295 foreach ( $flagsMaybeReparse as $flag ) {
296 if ( $editInfo->output->getFlag( $flag ) ) {
297 $logger->debug(
298 "Cache for key '{key}' has $flag; post-insertion parse possible.",
299 $context
300 );
301 }
302 }
303 }
304
305 return $editInfo;
306 }
307
312 private function incrStatsByContent( $subkey, Content $content ) {
313 $this->stats->increment( 'editstash.' . $subkey ); // overall for b/c
314 $this->stats->increment( 'editstash_by_model.' . $content->getModel() . '.' . $subkey );
315 }
316
321 private function getAndWaitForStashValue( $key ) {
322 $editInfo = $this->getStashValue( $key );
323
324 if ( !$editInfo ) {
325 $start = microtime( true );
326 // We ignore user aborts and keep parsing. Block on any prior parsing
327 // so as to use its results and make use of the time spent parsing.
328 // Skip this logic if there no master connection in case this method
329 // is called on an HTTP GET request for some reason.
330 $dbw = $this->lb->getAnyOpenConnection( $this->lb->getWriterIndex() );
331 if ( $dbw && $dbw->lock( $key, __METHOD__, 30 ) ) {
332 $editInfo = $this->getStashValue( $key );
333 $dbw->unlock( $key, __METHOD__ );
334 }
335
336 $timeMs = 1000 * max( 0, microtime( true ) - $start );
337 $this->stats->timing( 'editstash.lock_wait_time', $timeMs );
338 }
339
340 return $editInfo;
341 }
342
347 public function fetchInputText( $textHash ) {
348 $textKey = $this->cache->makeKey( 'stashedit', 'text', $textHash );
349
350 return $this->cache->get( $textKey );
351 }
352
358 public function stashInputText( $text, $textHash ) {
359 $textKey = $this->cache->makeKey( 'stashedit', 'text', $textHash );
360
361 return $this->cache->set(
362 $textKey,
363 $text,
364 self::MAX_CACHE_TTL,
365 BagOStuff::WRITE_ALLOW_SEGMENTS
366 );
367 }
368
373 private function lastEditTime( User $user ) {
374 $db = $this->lb->getConnectionRef( DB_REPLICA );
375
376 $actorQuery = ActorMigration::newMigration()->getWhere( $db, 'rc_user', $user, false );
377 $time = $db->selectField(
378 [ 'recentchanges' ] + $actorQuery['tables'],
379 'MAX(rc_timestamp)',
380 [ $actorQuery['conds'] ],
381 __METHOD__,
382 [],
383 $actorQuery['joins']
384 );
385
386 return wfTimestampOrNull( TS_MW, $time );
387 }
388
395 private function getContentHash( Content $content ) {
396 return sha1( implode( "\n", [
397 $content->getModel(),
398 $content->getDefaultFormat(),
399 $content->serialize( $content->getDefaultFormat() )
400 ] ) );
401 }
402
415 private function getStashKey( Title $title, $contentHash, User $user ) {
416 return $this->cache->makeKey(
417 'stashedit-info-v1',
418 md5( $title->getPrefixedDBkey() ),
419 // Account for the edit model/text
420 $contentHash,
421 // Account for user name related variables like signatures
422 md5( $user->getId() . "\n" . $user->getName() )
423 );
424 }
425
430 private function getStashValue( $key ) {
431 $stashInfo = $this->cache->get( $key );
432 if ( is_object( $stashInfo ) && $stashInfo->output instanceof ParserOutput ) {
433 return $stashInfo;
434 }
435
436 return false;
437 }
438
451 private function storeStashValue(
452 $key,
453 Content $pstContent,
454 ParserOutput $parserOutput,
455 $timestamp,
456 User $user
457 ) {
458 // If an item is renewed, mind the cache TTL determined by config and parser functions.
459 // Put an upper limit on the TTL for sanity to avoid extreme template/file staleness.
460 $age = time() - (int)wfTimestamp( TS_UNIX, $parserOutput->getCacheTime() );
461 $ttl = min( $parserOutput->getCacheExpiry() - $age, self::MAX_CACHE_TTL );
462 // Avoid extremely stale user signature timestamps (T84843)
463 if ( $parserOutput->getFlag( 'user-signature' ) ) {
464 $ttl = min( $ttl, self::MAX_SIGNATURE_TTL );
465 }
466
467 if ( $ttl <= 0 ) {
468 return 'uncacheable'; // low TTL due to a tag, magic word, or signature?
469 }
470
471 // Store what is actually needed and split the output into another key (T204742)
472 $stashInfo = (object)[
473 'pstContent' => $pstContent,
474 'output' => $parserOutput,
475 'timestamp' => $timestamp,
476 'edits' => $user->getEditCount()
477 ];
478
479 $ok = $this->cache->set( $key, $stashInfo, $ttl, BagOStuff::WRITE_ALLOW_SEGMENTS );
480 if ( $ok ) {
481 // These blobs can waste slots in low cardinality memcached slabs
482 $this->pruneExcessStashedEntries( $user, $key );
483 }
484
485 return $ok ? true : 'store_error';
486 }
487
492 private function pruneExcessStashedEntries( User $user, $newKey ) {
493 $key = $this->cache->makeKey( 'stash-edit-recent', sha1( $user->getName() ) );
494
495 $keyList = $this->cache->get( $key ) ?: [];
496 if ( count( $keyList ) >= self::MAX_CACHE_RECENT ) {
497 $oldestKey = array_shift( $keyList );
498 $this->cache->delete( $oldestKey, BagOStuff::WRITE_PRUNE_SEGMENTS );
499 }
500
501 $keyList[] = $newKey;
502 $this->cache->set( $key, $keyList, 2 * self::MAX_CACHE_TTL );
503 }
504
509 private function recentStashEntryCount( User $user ) {
510 $key = $this->cache->makeKey( 'stash-edit-recent', sha1( $user->getName() ) );
511
512 return count( $this->cache->get( $key ) ?: [] );
513 }
514}
wfTimestampOrNull( $outputtype=TS_UNIX, $ts=null)
Return a formatted timestamp, or null if input is null.
wfTimestamp( $outputtype=TS_UNIX, $ts=0)
Get a timestamp string in one of various formats.
This class handles the logic for the actor table migration and should always be used in lieu of direc...
Class representing a cache/ephemeral data store.
Definition BagOStuff.php:71
getCacheExpiry()
Returns the number of seconds after which this object should expire.
This class provides an implementation of the core hook interfaces, forwarding hook calls to HookConta...
Class for managing stashed edits used by the page updater classes.
incrStatsByContent( $subkey, Content $content)
StatsdDataFactoryInterface $stats
ParserOutputStashForEditHook $hookRunner
pruneExcessStashedEntries(User $user, $newKey)
getContentHash(Content $content)
Get hash of the content, factoring in model/format.
getStashKey(Title $title, $contentHash, User $user)
Get the temporary prepared edit stash key for a user.
checkCache(Title $title, Content $content, User $user)
Check that a prepared edit is in cache and still up-to-date.
storeStashValue( $key, Content $pstContent, ParserOutput $parserOutput, $timestamp, User $user)
Build a value to store in memcached based on the PST content and parser output.
parseAndCache(WikiPage $page, Content $content, User $user, string $summary)
__construct(BagOStuff $cache, ILoadBalancer $lb, LoggerInterface $logger, StatsdDataFactoryInterface $stats, HookContainer $hookContainer, $initiator)
Represents a title within MediaWiki.
Definition Title.php:42
The User object encapsulates all of the user-specific settings (user_id, name, rights,...
Definition User.php:60
getRequest()
Get the WebRequest object to use with this object.
Definition User.php:3205
getName()
Get the user name, or the IP of an anonymous user.
Definition User.php:2150
getId()
Get the user's ID.
Definition User.php:2121
getEditCount()
Get the user's edit count.
Definition User.php:3013
isBot()
Definition User.php:3095
isAnon()
Get whether the user is anonymous.
Definition User.php:3087
Class representing a MediaWiki article and history.
Definition WikiPage.php:51
prepareContentForEdit(Content $content, $revision=null, User $user=null, $serialFormat=null, $useCache=true)
Prepare content which is about to be saved.
getTitle()
Get the title object of the article.
Definition WikiPage.php:318
Base interface for content objects.
Definition Content.php:35
Database cluster connection, tracking, load balancing, and transaction manager interface.
const DB_REPLICA
Definition defines.php:25
const DB_MASTER
Definition defines.php:29
$content
Definition router.php:76
return true
Definition router.php:92