MediaWiki REL1_37
PageEditStash.php
Go to the documentation of this file.
1<?php
23namespace MediaWiki\Storage;
24
25use BagOStuff;
26use Content;
27use Liuggio\StatsdClient\Factory\StatsdDataFactoryInterface;
35use ParserOutput;
36use Psr\Log\LoggerInterface;
37use stdClass;
38use User;
40use Wikimedia\ScopedCallback;
41use WikiPage;
42
50 private $cache;
52 private $lb;
54 private $logger;
56 private $stats;
58 private $hookRunner;
62 private $userFactory;
64 private $initiator;
65
66 public const ERROR_NONE = 'stashed';
67 public const ERROR_PARSE = 'error_parse';
68 public const ERROR_CACHE = 'error_cache';
69 public const ERROR_UNCACHEABLE = 'uncacheable';
70 public const ERROR_BUSY = 'busy';
71
72 public const PRESUME_FRESH_TTL_SEC = 30;
73 public const MAX_CACHE_TTL = 300; // 5 minutes
74 public const MAX_SIGNATURE_TTL = 60;
75
76 private const MAX_CACHE_RECENT = 2;
77
78 public const INITIATOR_USER = 1;
79 public const INITIATOR_JOB_OR_CLI = 2;
80
91 public function __construct(
94 LoggerInterface $logger,
95 StatsdDataFactoryInterface $stats,
98 HookContainer $hookContainer,
100 ) {
101 $this->cache = $cache;
102 $this->lb = $lb;
103 $this->logger = $logger;
104 $this->stats = $stats;
105 $this->userEditTracker = $userEditTracker;
106 $this->userFactory = $userFactory;
107 $this->hookRunner = new HookRunner( $hookContainer );
108 $this->initiator = $initiator;
109 }
110
118 public function parseAndCache( WikiPage $page, Content $content, UserIdentity $user, string $summary ) {
120
121 $key = $this->getStashKey( $page, $this->getContentHash( $content ), $user );
122 $fname = __METHOD__;
123
124 // Use the primary DB to allow for fast blocking locks on the "save path" where this
125 // value might actually be used to complete a page edit. If the edit submission request
126 // happens before this edit stash requests finishes, then the submission will block until
127 // the stash request finishes parsing. For the lock acquisition below, there is not much
128 // need to duplicate parsing of the same content/user/summary bundle, so try to avoid
129 // blocking at all here.
130 $dbw = $this->lb->getConnectionRef( DB_PRIMARY );
131 if ( !$dbw->lock( $key, $fname, 0 ) ) {
132 // De-duplicate requests on the same key
133 return self::ERROR_BUSY;
134 }
136 $unlocker = new ScopedCallback( static function () use ( $dbw, $key, $fname ) {
137 $dbw->unlock( $key, $fname );
138 } );
139
140 $cutoffTime = time() - self::PRESUME_FRESH_TTL_SEC;
141
142 // Reuse any freshly build matching edit stash cache
143 $editInfo = $this->getStashValue( $key );
144 if ( $editInfo && wfTimestamp( TS_UNIX, $editInfo->timestamp ) >= $cutoffTime ) {
145 $alreadyCached = true;
146 } else {
147 $format = $content->getDefaultFormat();
148 $editInfo = $page->prepareContentForEdit( $content, null, $user, $format, false );
149 $editInfo->output->setCacheTime( $editInfo->timestamp );
150 $alreadyCached = false;
151 }
152
153 $logContext = [ 'cachekey' => $key, 'title' => (string)$page ];
154
155 if ( $editInfo && $editInfo->output ) {
156 // Let extensions add ParserOutput metadata or warm other caches
157 $legacyUser = $this->userFactory->newFromUserIdentity( $user );
158 $this->hookRunner->onParserOutputStashForEdit(
159 $page, $content, $editInfo->output, $summary, $legacyUser );
160
161 if ( $alreadyCached ) {
162 $logger->debug( "Parser output for key '{cachekey}' already cached.", $logContext );
163
164 return self::ERROR_NONE;
165 }
166
167 $code = $this->storeStashValue(
168 $key,
169 $editInfo->pstContent,
170 $editInfo->output,
171 $editInfo->timestamp,
172 $user
173 );
174
175 if ( $code === true ) {
176 $logger->debug( "Cached parser output for key '{cachekey}'.", $logContext );
177
178 return self::ERROR_NONE;
179 } elseif ( $code === 'uncacheable' ) {
180 $logger->info(
181 "Uncacheable parser output for key '{cachekey}' [{code}].",
182 $logContext + [ 'code' => $code ]
183 );
184
186 } else {
187 $logger->error(
188 "Failed to cache parser output for key '{cachekey}'.",
189 $logContext + [ 'code' => $code ]
190 );
191
192 return self::ERROR_CACHE;
193 }
194 }
195
196 return self::ERROR_PARSE;
197 }
198
219 public function checkCache( PageIdentity $page, Content $content, User $user ) {
220 if (
221 // The context is not an HTTP POST request
222 !$user->getRequest()->wasPosted() ||
223 // The context is a CLI script or a job runner HTTP POST request
224 $this->initiator !== self::INITIATOR_USER ||
225 // The editor account is a known bot
226 $user->isBot()
227 ) {
228 // Avoid wasted queries and statsd pollution
229 return false;
230 }
231
233
234 $key = $this->getStashKey( $page, $this->getContentHash( $content ), $user );
235 $logContext = [
236 'key' => $key,
237 'title' => (string)$page,
238 'user' => $user->getName()
239 ];
240
241 $editInfo = $this->getAndWaitForStashValue( $key );
242 if ( !is_object( $editInfo ) || !$editInfo->output ) {
243 $this->incrStatsByContent( 'cache_misses.no_stash', $content );
244 if ( $this->recentStashEntryCount( $user ) > 0 ) {
245 $logger->info( "Empty cache for key '{key}' but not for user.", $logContext );
246 } else {
247 $logger->debug( "Empty cache for key '{key}'.", $logContext );
248 }
249
250 return false;
251 }
252
253 $age = time() - (int)wfTimestamp( TS_UNIX, $editInfo->output->getCacheTime() );
254 $logContext['age'] = $age;
255
256 $isCacheUsable = true;
257 if ( $age <= self::PRESUME_FRESH_TTL_SEC ) {
258 // Assume nothing changed in this time
259 $this->incrStatsByContent( 'cache_hits.presumed_fresh', $content );
260 $logger->debug( "Timestamp-based cache hit for key '{key}'.", $logContext );
261 } elseif ( $user->isAnon() ) {
262 $lastEdit = $this->lastEditTime( $user );
263 $cacheTime = $editInfo->output->getCacheTime();
264 if ( $lastEdit < $cacheTime ) {
265 // Logged-out user made no local upload/template edits in the meantime
266 $this->incrStatsByContent( 'cache_hits.presumed_fresh', $content );
267 $logger->debug( "Edit check based cache hit for key '{key}'.", $logContext );
268 } else {
269 $isCacheUsable = false;
270 $this->incrStatsByContent( 'cache_misses.proven_stale', $content );
271 $logger->info( "Stale cache for key '{key}' due to outside edits.", $logContext );
272 }
273 } else {
274 if ( $editInfo->edits === $this->userEditTracker->getUserEditCount( $user ) ) {
275 // Logged-in user made no local upload/template edits in the meantime
276 $this->incrStatsByContent( 'cache_hits.presumed_fresh', $content );
277 $logger->debug( "Edit count based cache hit for key '{key}'.", $logContext );
278 } else {
279 $isCacheUsable = false;
280 $this->incrStatsByContent( 'cache_misses.proven_stale', $content );
281 $logger->info( "Stale cache for key '{key}'due to outside edits.", $logContext );
282 }
283 }
284
285 if ( !$isCacheUsable ) {
286 return false;
287 }
288
289 if ( $editInfo->output->getFlag( 'vary-revision' ) ) {
290 // This can be used for the initial parse, e.g. for filters or doUserEditContent(),
291 // but a second parse will be triggered in doEditUpdates() no matter what
292 $logger->info(
293 "Cache for key '{key}' has vary-revision; post-insertion parse inevitable.",
294 $logContext
295 );
296 } else {
297 static $flagsMaybeReparse = [
298 // Similar to the above if we didn't guess the ID correctly
299 'vary-revision-id',
300 // Similar to the above if we didn't guess the timestamp correctly
301 'vary-revision-timestamp',
302 // Similar to the above if we didn't guess the content correctly
303 'vary-revision-sha1',
304 // Similar to the above if we didn't guess page ID correctly
305 'vary-page-id'
306 ];
307 foreach ( $flagsMaybeReparse as $flag ) {
308 if ( $editInfo->output->getFlag( $flag ) ) {
309 $logger->debug(
310 "Cache for key '{key}' has $flag; post-insertion parse possible.",
311 $logContext
312 );
313 }
314 }
315 }
316
317 return $editInfo;
318 }
319
324 private function incrStatsByContent( $subkey, Content $content ) {
325 $this->stats->increment( 'editstash.' . $subkey ); // overall for b/c
326 $this->stats->increment( 'editstash_by_model.' . $content->getModel() . '.' . $subkey );
327 }
328
333 private function getAndWaitForStashValue( $key ) {
334 $editInfo = $this->getStashValue( $key );
335
336 if ( !$editInfo ) {
337 $start = microtime( true );
338 // We ignore user aborts and keep parsing. Block on any prior parsing
339 // so as to use its results and make use of the time spent parsing.
340 // Skip this logic if there no primary connection in case this method
341 // is called on an HTTP GET request for some reason.
342 $dbw = $this->lb->getAnyOpenConnection( $this->lb->getWriterIndex() );
343 if ( $dbw && $dbw->lock( $key, __METHOD__, 30 ) ) {
344 $editInfo = $this->getStashValue( $key );
345 $dbw->unlock( $key, __METHOD__ );
346 }
347
348 $timeMs = 1000 * max( 0, microtime( true ) - $start );
349 $this->stats->timing( 'editstash.lock_wait_time', $timeMs );
350 }
351
352 return $editInfo;
353 }
354
359 public function fetchInputText( $textHash ) {
360 $textKey = $this->cache->makeKey( 'stashedit', 'text', $textHash );
361
362 return $this->cache->get( $textKey );
363 }
364
370 public function stashInputText( $text, $textHash ) {
371 $textKey = $this->cache->makeKey( 'stashedit', 'text', $textHash );
372
373 return $this->cache->set(
374 $textKey,
375 $text,
376 self::MAX_CACHE_TTL,
377 BagOStuff::WRITE_ALLOW_SEGMENTS
378 );
379 }
380
385 private function lastEditTime( UserIdentity $user ) {
386 $db = $this->lb->getConnectionRef( DB_REPLICA );
387
388 $time = $db->newSelectQueryBuilder()
389 ->select( 'MAX(rc_timestamp)' )
390 ->from( 'recentchanges' )
391 ->join( 'actor', null, 'actor_id=rc_actor' )
392 ->where( [ 'actor_name' => $user->getName() ] )
393 ->caller( __METHOD__ )
394 ->fetchField();
395
396 return wfTimestampOrNull( TS_MW, $time );
397 }
398
405 private function getContentHash( Content $content ) {
406 return sha1( implode( "\n", [
407 $content->getModel(),
408 $content->getDefaultFormat(),
409 $content->serialize( $content->getDefaultFormat() )
410 ] ) );
411 }
412
425 private function getStashKey( PageIdentity $page, $contentHash, UserIdentity $user ) {
426 return $this->cache->makeKey(
427 'stashedit-info-v1',
428 md5( "{$page->getNamespace()}\n{$page->getDBkey()}" ),
429 // Account for the edit model/text
430 $contentHash,
431 // Account for user name related variables like signatures
432 md5( "{$user->getId()}\n{$user->getName()}" )
433 );
434 }
435
440 private function getStashValue( $key ) {
441 $stashInfo = $this->cache->get( $key );
442 if ( is_object( $stashInfo ) && $stashInfo->output instanceof ParserOutput ) {
443 return $stashInfo;
444 }
445
446 return false;
447 }
448
461 private function storeStashValue(
462 $key,
463 Content $pstContent,
464 ParserOutput $parserOutput,
465 $timestamp,
466 UserIdentity $user
467 ) {
468 // If an item is renewed, mind the cache TTL determined by config and parser functions.
469 // Put an upper limit on the TTL for sanity to avoid extreme template/file staleness.
470 $age = time() - (int)wfTimestamp( TS_UNIX, $parserOutput->getCacheTime() );
471 $ttl = min( $parserOutput->getCacheExpiry() - $age, self::MAX_CACHE_TTL );
472 // Avoid extremely stale user signature timestamps (T84843)
473 if ( $parserOutput->getFlag( 'user-signature' ) ) {
474 $ttl = min( $ttl, self::MAX_SIGNATURE_TTL );
475 }
476
477 if ( $ttl <= 0 ) {
478 return 'uncacheable'; // low TTL due to a tag, magic word, or signature?
479 }
480
481 // Store what is actually needed and split the output into another key (T204742)
482 $stashInfo = (object)[
483 'pstContent' => $pstContent,
484 'output' => $parserOutput,
485 'timestamp' => $timestamp,
486 'edits' => $user->isRegistered() ? $this->userEditTracker->getUserEditCount( $user ) : null,
487 ];
488
489 $ok = $this->cache->set( $key, $stashInfo, $ttl, BagOStuff::WRITE_ALLOW_SEGMENTS );
490 if ( $ok ) {
491 // These blobs can waste slots in low cardinality memcached slabs
492 $this->pruneExcessStashedEntries( $user, $key );
493 }
494
495 return $ok ? true : 'store_error';
496 }
497
502 private function pruneExcessStashedEntries( UserIdentity $user, $newKey ) {
503 $key = $this->cache->makeKey( 'stash-edit-recent', sha1( $user->getName() ) );
504
505 $keyList = $this->cache->get( $key ) ?: [];
506 if ( count( $keyList ) >= self::MAX_CACHE_RECENT ) {
507 $oldestKey = array_shift( $keyList );
508 $this->cache->delete( $oldestKey, BagOStuff::WRITE_PRUNE_SEGMENTS );
509 }
510
511 $keyList[] = $newKey;
512 $this->cache->set( $key, $keyList, 2 * self::MAX_CACHE_TTL );
513 }
514
519 private function recentStashEntryCount( UserIdentity $user ) {
520 $key = $this->cache->makeKey( 'stash-edit-recent', sha1( $user->getName() ) );
521
522 return count( $this->cache->get( $key ) ?: [] );
523 }
524}
wfTimestampOrNull( $outputtype=TS_UNIX, $ts=null)
Return a formatted timestamp, or null if input is null.
wfTimestamp( $outputtype=TS_UNIX, $ts=0)
Get a timestamp string in one of various formats.
Class representing a cache/ephemeral data store.
Definition BagOStuff.php:86
getCacheExpiry()
Returns the number of seconds after which this object should expire.
This class provides an implementation of the core hook interfaces, forwarding hook calls to HookConta...
Class for managing stashed edits used by the page updater classes.
storeStashValue( $key, Content $pstContent, ParserOutput $parserOutput, $timestamp, UserIdentity $user)
Build a value to store in memcached based on the PST content and parser output.
__construct(BagOStuff $cache, ILoadBalancer $lb, LoggerInterface $logger, StatsdDataFactoryInterface $stats, UserEditTracker $userEditTracker, UserFactory $userFactory, HookContainer $hookContainer, $initiator)
pruneExcessStashedEntries(UserIdentity $user, $newKey)
parseAndCache(WikiPage $page, Content $content, UserIdentity $user, string $summary)
incrStatsByContent( $subkey, Content $content)
StatsdDataFactoryInterface $stats
ParserOutputStashForEditHook $hookRunner
getContentHash(Content $content)
Get hash of the content, factoring in model/format.
getStashKey(PageIdentity $page, $contentHash, UserIdentity $user)
Get the temporary prepared edit stash key for a user.
recentStashEntryCount(UserIdentity $user)
checkCache(PageIdentity $page, Content $content, User $user)
Check that a prepared edit is in cache and still up-to-date.
Track info about user edit counts and timings.
Creates User objects.
The User object encapsulates all of the user-specific settings (user_id, name, rights,...
Definition User.php:69
getRequest()
Get the WebRequest object to use with this object.
Definition User.php:3075
getName()
Get the user name, or the IP of an anonymous user.
Definition User.php:2116
isBot()
Definition User.php:2994
isAnon()
Get whether the user is anonymous.
Definition User.php:2986
Class representing a MediaWiki article and history.
Definition WikiPage.php:60
prepareContentForEdit(Content $content, RevisionRecord $revision=null, UserIdentity $user=null, $serialFormat=null, $useCache=true)
Prepare content which is about to be saved.
Base interface for content objects.
Definition Content.php:35
Interface for objects (potentially) representing an editable wiki page.
This is a hook handler interface, see docs/Hooks.md.
Interface for objects representing user identity.
Database cluster connection, tracking, load balancing, and transaction manager interface.
const DB_REPLICA
Definition defines.php:25
const DB_PRIMARY
Definition defines.php:27
$content
Definition router.php:76
return true
Definition router.php:92