MediaWiki REL1_39
PageEditStash.php
Go to the documentation of this file.
1<?php
21namespace MediaWiki\Storage;
22
23use BagOStuff;
24use Content;
25use Liuggio\StatsdClient\Factory\StatsdDataFactoryInterface;
36use ParserOutput;
37use Psr\Log\LoggerInterface;
38use stdClass;
40use Wikimedia\ScopedCallback;
41use WikiPage;
42
55 private $cache;
57 private $lb;
59 private $logger;
61 private $stats;
63 private $hookRunner;
65 private $userEditTracker;
67 private $userFactory;
69 private $wikiPageFactory;
71 private $initiator;
72
73 public const ERROR_NONE = 'stashed';
74 public const ERROR_PARSE = 'error_parse';
75 public const ERROR_CACHE = 'error_cache';
76 public const ERROR_UNCACHEABLE = 'uncacheable';
77 public const ERROR_BUSY = 'busy';
78
79 public const PRESUME_FRESH_TTL_SEC = 30;
80 public const MAX_CACHE_TTL = 300; // 5 minutes
81 public const MAX_SIGNATURE_TTL = 60;
82
83 private const MAX_CACHE_RECENT = 2;
84
85 public const INITIATOR_USER = 1;
86 public const INITIATOR_JOB_OR_CLI = 2;
87
99 public function __construct(
100 BagOStuff $cache,
101 ILoadBalancer $lb,
102 LoggerInterface $logger,
103 StatsdDataFactoryInterface $stats,
104 UserEditTracker $userEditTracker,
105 UserFactory $userFactory,
106 WikiPageFactory $wikiPageFactory,
107 HookContainer $hookContainer,
108 $initiator
109 ) {
110 $this->cache = $cache;
111 $this->lb = $lb;
112 $this->logger = $logger;
113 $this->stats = $stats;
114 $this->userEditTracker = $userEditTracker;
115 $this->userFactory = $userFactory;
116 $this->wikiPageFactory = $wikiPageFactory;
117 $this->hookRunner = new HookRunner( $hookContainer );
118 $this->initiator = $initiator;
119 }
120
128 public function parseAndCache( $pageUpdater, Content $content, UserIdentity $user, string $summary ) {
129 $logger = $this->logger;
130
131 if ( $pageUpdater instanceof WikiPage ) {
132 // TODO: Trigger deprecation warning once extensions have been fixed.
133 // Or better, create PageUpdater::prepareAndStash and deprecate this method.
134 $pageUpdater = $pageUpdater->newPageUpdater( $user );
135 }
136
137 $page = $pageUpdater->getPage();
138 $key = $this->getStashKey( $page, $this->getContentHash( $content ), $user );
139 $fname = __METHOD__;
140
141 // Use the primary DB to allow for fast blocking locks on the "save path" where this
142 // value might actually be used to complete a page edit. If the edit submission request
143 // happens before this edit stash requests finishes, then the submission will block until
144 // the stash request finishes parsing. For the lock acquisition below, there is not much
145 // need to duplicate parsing of the same content/user/summary bundle, so try to avoid
146 // blocking at all here.
147 $dbw = $this->lb->getConnectionRef( DB_PRIMARY );
148 if ( !$dbw->lock( $key, $fname, 0 ) ) {
149 // De-duplicate requests on the same key
150 return self::ERROR_BUSY;
151 }
153 $unlocker = new ScopedCallback( static function () use ( $dbw, $key, $fname ) {
154 $dbw->unlock( $key, $fname );
155 } );
156
157 $cutoffTime = time() - self::PRESUME_FRESH_TTL_SEC;
158
159 // Reuse any freshly build matching edit stash cache
160 $editInfo = $this->getStashValue( $key );
161 if ( $editInfo && (int)wfTimestamp( TS_UNIX, $editInfo->timestamp ) >= $cutoffTime ) {
162 $alreadyCached = true;
163 } else {
164 $pageUpdater->setContent( SlotRecord::MAIN, $content );
165
166 $update = $pageUpdater->prepareUpdate( EDIT_INTERNAL ); // applies pre-safe transform
167 $output = $update->getCanonicalParserOutput(); // causes content to be parsed
168 $output->setCacheTime( $update->getRevision()->getTimestamp() );
169
170 // emulate a cache value that kind of looks like a PreparedEdit, for use below
171 $editInfo = (object)[
172 'pstContent' => $update->getRawContent( SlotRecord::MAIN ),
173 'output' => $output,
174 'timestamp' => $output->getCacheTime()
175 ];
176
177 $alreadyCached = false;
178 }
179
180 $logContext = [ 'cachekey' => $key, 'title' => (string)$page ];
181
182 if ( $editInfo->output ) {
183 // Let extensions add ParserOutput metadata or warm other caches
184 $legacyUser = $this->userFactory->newFromUserIdentity( $user );
185 $legacyPage = $this->wikiPageFactory->newFromTitle( $page );
186 $this->hookRunner->onParserOutputStashForEdit(
187 $legacyPage, $content, $editInfo->output, $summary, $legacyUser );
188
189 if ( $alreadyCached ) {
190 $logger->debug( "Parser output for key '{cachekey}' already cached.", $logContext );
191
192 return self::ERROR_NONE;
193 }
194
195 $code = $this->storeStashValue(
196 $key,
197 $editInfo->pstContent,
198 $editInfo->output,
199 $editInfo->timestamp,
200 $user
201 );
202
203 if ( $code === true ) {
204 $logger->debug( "Cached parser output for key '{cachekey}'.", $logContext );
205
206 return self::ERROR_NONE;
207 } elseif ( $code === 'uncacheable' ) {
208 $logger->info(
209 "Uncacheable parser output for key '{cachekey}' [{code}].",
210 $logContext + [ 'code' => $code ]
211 );
212
214 } else {
215 $logger->error(
216 "Failed to cache parser output for key '{cachekey}'.",
217 $logContext + [ 'code' => $code ]
218 );
219
220 return self::ERROR_CACHE;
221 }
222 }
223
224 return self::ERROR_PARSE;
225 }
226
247 public function checkCache( PageIdentity $page, Content $content, UserIdentity $user ) {
248 $legacyUser = $this->userFactory->newFromUserIdentity( $user );
249 if (
250 // The context is not an HTTP POST request
251 !$legacyUser->getRequest()->wasPosted() ||
252 // The context is a CLI script or a job runner HTTP POST request
253 $this->initiator !== self::INITIATOR_USER ||
254 // The editor account is a known bot
255 $legacyUser->isBot()
256 ) {
257 // Avoid wasted queries and statsd pollution
258 return false;
259 }
260
261 $logger = $this->logger;
262
263 $key = $this->getStashKey( $page, $this->getContentHash( $content ), $user );
264 $logContext = [
265 'key' => $key,
266 'title' => (string)$page,
267 'user' => $user->getName()
268 ];
269
270 $editInfo = $this->getAndWaitForStashValue( $key );
271 if ( !is_object( $editInfo ) || !$editInfo->output ) {
272 $this->incrStatsByContent( 'cache_misses.no_stash', $content );
273 if ( $this->recentStashEntryCount( $user ) > 0 ) {
274 $logger->info( "Empty cache for key '{key}' but not for user.", $logContext );
275 } else {
276 $logger->debug( "Empty cache for key '{key}'.", $logContext );
277 }
278
279 return false;
280 }
281
282 $age = time() - (int)wfTimestamp( TS_UNIX, $editInfo->output->getCacheTime() );
283 $logContext['age'] = $age;
284
285 $isCacheUsable = true;
286 if ( $age <= self::PRESUME_FRESH_TTL_SEC ) {
287 // Assume nothing changed in this time
288 $this->incrStatsByContent( 'cache_hits.presumed_fresh', $content );
289 $logger->debug( "Timestamp-based cache hit for key '{key}'.", $logContext );
290 } elseif ( !$user->isRegistered() ) {
291 $lastEdit = $this->lastEditTime( $user );
292 $cacheTime = $editInfo->output->getCacheTime();
293 if ( $lastEdit < $cacheTime ) {
294 // Logged-out user made no local upload/template edits in the meantime
295 $this->incrStatsByContent( 'cache_hits.presumed_fresh', $content );
296 $logger->debug( "Edit check based cache hit for key '{key}'.", $logContext );
297 } else {
298 $isCacheUsable = false;
299 $this->incrStatsByContent( 'cache_misses.proven_stale', $content );
300 $logger->info( "Stale cache for key '{key}' due to outside edits.", $logContext );
301 }
302 } else {
303 if ( $editInfo->edits === $this->userEditTracker->getUserEditCount( $user ) ) {
304 // Logged-in user made no local upload/template edits in the meantime
305 $this->incrStatsByContent( 'cache_hits.presumed_fresh', $content );
306 $logger->debug( "Edit count based cache hit for key '{key}'.", $logContext );
307 } else {
308 $isCacheUsable = false;
309 $this->incrStatsByContent( 'cache_misses.proven_stale', $content );
310 $logger->info( "Stale cache for key '{key}'due to outside edits.", $logContext );
311 }
312 }
313
314 if ( !$isCacheUsable ) {
315 return false;
316 }
317
318 if ( $editInfo->output->getOutputFlag( ParserOutputFlags::VARY_REVISION ) ) {
319 // This can be used for the initial parse, e.g. for filters or doUserEditContent(),
320 // but a second parse will be triggered in doEditUpdates() no matter what
321 $logger->info(
322 "Cache for key '{key}' has vary-revision; post-insertion parse inevitable.",
323 $logContext
324 );
325 } else {
326 static $flagsMaybeReparse = [
327 // Similar to the above if we didn't guess the ID correctly
328 ParserOutputFlags::VARY_REVISION_ID,
329 // Similar to the above if we didn't guess the timestamp correctly
330 ParserOutputFlags::VARY_REVISION_TIMESTAMP,
331 // Similar to the above if we didn't guess the content correctly
332 ParserOutputFlags::VARY_REVISION_SHA1,
333 // Similar to the above if we didn't guess page ID correctly
334 ParserOutputFlags::VARY_PAGE_ID,
335 ];
336 foreach ( $flagsMaybeReparse as $flag ) {
337 if ( $editInfo->output->getOutputFlag( $flag ) ) {
338 $logger->debug(
339 "Cache for key '{key}' has $flag; post-insertion parse possible.",
340 $logContext
341 );
342 }
343 }
344 }
345
346 return $editInfo;
347 }
348
353 private function incrStatsByContent( $subkey, Content $content ) {
354 $this->stats->increment( 'editstash.' . $subkey ); // overall for b/c
355 $this->stats->increment( 'editstash_by_model.' . $content->getModel() . '.' . $subkey );
356 }
357
362 private function getAndWaitForStashValue( $key ) {
363 $editInfo = $this->getStashValue( $key );
364
365 if ( !$editInfo ) {
366 $start = microtime( true );
367 // We ignore user aborts and keep parsing. Block on any prior parsing
368 // so as to use its results and make use of the time spent parsing.
369 // Skip this logic if there no primary connection in case this method
370 // is called on an HTTP GET request for some reason.
371 $dbw = $this->lb->getAnyOpenConnection( $this->lb->getWriterIndex() );
372 if ( $dbw && $dbw->lock( $key, __METHOD__, 30 ) ) {
373 $editInfo = $this->getStashValue( $key );
374 $dbw->unlock( $key, __METHOD__ );
375 }
376
377 $timeMs = 1000 * max( 0, microtime( true ) - $start );
378 $this->stats->timing( 'editstash.lock_wait_time', $timeMs );
379 }
380
381 return $editInfo;
382 }
383
388 public function fetchInputText( $textHash ) {
389 $textKey = $this->cache->makeKey( 'stashedit', 'text', $textHash );
390
391 return $this->cache->get( $textKey );
392 }
393
399 public function stashInputText( $text, $textHash ) {
400 $textKey = $this->cache->makeKey( 'stashedit', 'text', $textHash );
401
402 return $this->cache->set(
403 $textKey,
404 $text,
405 self::MAX_CACHE_TTL,
406 BagOStuff::WRITE_ALLOW_SEGMENTS
407 );
408 }
409
414 private function lastEditTime( UserIdentity $user ) {
415 $db = $this->lb->getConnectionRef( DB_REPLICA );
416
417 $time = $db->newSelectQueryBuilder()
418 ->select( 'MAX(rc_timestamp)' )
419 ->from( 'recentchanges' )
420 ->join( 'actor', null, 'actor_id=rc_actor' )
421 ->where( [ 'actor_name' => $user->getName() ] )
422 ->caller( __METHOD__ )
423 ->fetchField();
424
425 return wfTimestampOrNull( TS_MW, $time );
426 }
427
434 private function getContentHash( Content $content ) {
435 return sha1( implode( "\n", [
436 $content->getModel(),
437 $content->getDefaultFormat(),
438 $content->serialize( $content->getDefaultFormat() )
439 ] ) );
440 }
441
454 private function getStashKey( PageIdentity $page, $contentHash, UserIdentity $user ) {
455 return $this->cache->makeKey(
456 'stashedit-info-v1',
457 md5( "{$page->getNamespace()}\n{$page->getDBkey()}" ),
458 // Account for the edit model/text
459 $contentHash,
460 // Account for user name related variables like signatures
461 md5( "{$user->getId()}\n{$user->getName()}" )
462 );
463 }
464
469 private function getStashValue( $key ) {
470 $stashInfo = $this->cache->get( $key );
471 if ( is_object( $stashInfo ) && $stashInfo->output instanceof ParserOutput ) {
472 return $stashInfo;
473 }
474
475 return false;
476 }
477
490 private function storeStashValue(
491 $key,
492 Content $pstContent,
493 ParserOutput $parserOutput,
494 $timestamp,
495 UserIdentity $user
496 ) {
497 // If an item is renewed, mind the cache TTL determined by config and parser functions.
498 // Put an upper limit on the TTL to avoid extreme template/file staleness.
499 $age = time() - (int)wfTimestamp( TS_UNIX, $parserOutput->getCacheTime() );
500 $ttl = min( $parserOutput->getCacheExpiry() - $age, self::MAX_CACHE_TTL );
501 // Avoid extremely stale user signature timestamps (T84843)
502 if ( $parserOutput->getOutputFlag( ParserOutputFlags::USER_SIGNATURE ) ) {
503 $ttl = min( $ttl, self::MAX_SIGNATURE_TTL );
504 }
505
506 if ( $ttl <= 0 ) {
507 return 'uncacheable'; // low TTL due to a tag, magic word, or signature?
508 }
509
510 // Store what is actually needed and split the output into another key (T204742)
511 $stashInfo = (object)[
512 'pstContent' => $pstContent,
513 'output' => $parserOutput,
514 'timestamp' => $timestamp,
515 'edits' => $user->isRegistered() ? $this->userEditTracker->getUserEditCount( $user ) : null,
516 ];
517
518 $ok = $this->cache->set( $key, $stashInfo, $ttl, BagOStuff::WRITE_ALLOW_SEGMENTS );
519 if ( $ok ) {
520 // These blobs can waste slots in low cardinality memcached slabs
521 $this->pruneExcessStashedEntries( $user, $key );
522 }
523
524 return $ok ? true : 'store_error';
525 }
526
531 private function pruneExcessStashedEntries( UserIdentity $user, $newKey ) {
532 $key = $this->cache->makeKey( 'stash-edit-recent', sha1( $user->getName() ) );
533
534 $keyList = $this->cache->get( $key ) ?: [];
535 if ( count( $keyList ) >= self::MAX_CACHE_RECENT ) {
536 $oldestKey = array_shift( $keyList );
537 $this->cache->delete( $oldestKey, BagOStuff::WRITE_PRUNE_SEGMENTS );
538 }
539
540 $keyList[] = $newKey;
541 $this->cache->set( $key, $keyList, 2 * self::MAX_CACHE_TTL );
542 }
543
548 private function recentStashEntryCount( UserIdentity $user ) {
549 $key = $this->cache->makeKey( 'stash-edit-recent', sha1( $user->getName() ) );
550
551 return count( $this->cache->get( $key ) ?: [] );
552 }
553}
const EDIT_INTERNAL
Definition Defines.php:133
wfTimestampOrNull( $outputtype=TS_UNIX, $ts=null)
Return a formatted timestamp, or null if input is null.
wfTimestamp( $outputtype=TS_UNIX, $ts=0)
Get a timestamp string in one of various formats.
Class representing a cache/ephemeral data store.
Definition BagOStuff.php:85
getCacheExpiry()
Returns the number of seconds after which this object should expire.
This class provides an implementation of the core hook interfaces, forwarding hook calls to HookConta...
Service for creating WikiPage objects.
Value object representing a content slot associated with a page revision.
Manage the pre-emptive page parsing for edits to wiki pages.
checkCache(PageIdentity $page, Content $content, UserIdentity $user)
Check that a prepared edit is in cache and still up-to-date.
parseAndCache( $pageUpdater, Content $content, UserIdentity $user, string $summary)
__construct(BagOStuff $cache, ILoadBalancer $lb, LoggerInterface $logger, StatsdDataFactoryInterface $stats, UserEditTracker $userEditTracker, UserFactory $userFactory, WikiPageFactory $wikiPageFactory, HookContainer $hookContainer, $initiator)
Track info about user edit counts and timings.
Creates User objects.
getOutputFlag(string $name)
Provides a uniform interface to various boolean flags stored in the ParserOutput.
Base representation for an editable wiki page.
Definition WikiPage.php:62
Base interface for content objects.
Definition Content.php:35
Interface for objects (potentially) representing an editable wiki page.
This is a hook handler interface, see docs/Hooks.md.
Interface for objects representing user identity.
Create and track the database connections and transactions for a given database cluster.
$cache
Definition mcc.php:33
const DB_REPLICA
Definition defines.php:26
const DB_PRIMARY
Definition defines.php:28
$content
Definition router.php:76
return true
Definition router.php:92