MediaWiki master
PageEditStash.php
Go to the documentation of this file.
1<?php
21namespace MediaWiki\Storage;
22
35use Psr\Log\LoggerInterface;
36use stdClass;
39use Wikimedia\ScopedCallback;
41use WikiPage;
42
56 private $cache;
58 private $dbProvider;
60 private $logger;
62 private $stats;
64 private $hookRunner;
66 private $userEditTracker;
68 private $userFactory;
70 private $wikiPageFactory;
72 private $initiator;
73
74 public const ERROR_NONE = 'stashed';
75 public const ERROR_PARSE = 'error_parse';
76 public const ERROR_CACHE = 'error_cache';
77 public const ERROR_UNCACHEABLE = 'uncacheable';
78 public const ERROR_BUSY = 'busy';
79
80 public const PRESUME_FRESH_TTL_SEC = 30;
81 public const MAX_CACHE_TTL = 300; // 5 minutes
82 public const MAX_SIGNATURE_TTL = 60;
83
84 private const MAX_CACHE_RECENT = 2;
85
86 public const INITIATOR_USER = 1;
87 public const INITIATOR_JOB_OR_CLI = 2;
88
100 public function __construct(
101 BagOStuff $cache,
102 IConnectionProvider $dbProvider,
103 LoggerInterface $logger,
104 StatsFactory $stats,
105 UserEditTracker $userEditTracker,
106 UserFactory $userFactory,
107 WikiPageFactory $wikiPageFactory,
108 HookContainer $hookContainer,
109 $initiator
110 ) {
111 $this->cache = $cache;
112 $this->dbProvider = $dbProvider;
113 $this->logger = $logger;
114 $this->stats = $stats;
115 $this->userEditTracker = $userEditTracker;
116 $this->userFactory = $userFactory;
117 $this->wikiPageFactory = $wikiPageFactory;
118 $this->hookRunner = new HookRunner( $hookContainer );
119 $this->initiator = $initiator;
120 }
121
129 public function parseAndCache( $pageUpdater, Content $content, UserIdentity $user, string $summary ) {
130 $logger = $this->logger;
131
132 if ( $pageUpdater instanceof WikiPage ) {
133 wfDeprecated( __METHOD__ . ' with WikiPage instance', '1.42' );
134 $pageUpdater = $pageUpdater->newPageUpdater( $user );
135 }
136
137 $page = $pageUpdater->getPage();
138 $key = $this->getStashKey( $page, $this->getContentHash( $content ), $user );
139 $fname = __METHOD__;
140
141 // Use the primary DB to allow for fast blocking locks on the "save path" where this
142 // value might actually be used to complete a page edit. If the edit submission request
143 // happens before this edit stash requests finishes, then the submission will block until
144 // the stash request finishes parsing. For the lock acquisition below, there is not much
145 // need to duplicate parsing of the same content/user/summary bundle, so try to avoid
146 // blocking at all here.
147 $dbw = $this->dbProvider->getPrimaryDatabase();
148 if ( !$dbw->lock( $key, $fname, 0 ) ) {
149 // De-duplicate requests on the same key
150 return self::ERROR_BUSY;
151 }
153 $unlocker = new ScopedCallback( static function () use ( $dbw, $key, $fname ) {
154 $dbw->unlock( $key, $fname );
155 } );
156
157 $cutoffTime = time() - self::PRESUME_FRESH_TTL_SEC;
158
159 // Reuse any freshly build matching edit stash cache
160 $editInfo = $this->getStashValue( $key );
161 if ( $editInfo && (int)wfTimestamp( TS_UNIX, $editInfo->timestamp ) >= $cutoffTime ) {
162 $alreadyCached = true;
163 } else {
164 $pageUpdater->setContent( SlotRecord::MAIN, $content );
165
166 $update = $pageUpdater->prepareUpdate( EDIT_INTERNAL ); // applies pre-safe transform
167 $output = $update->getCanonicalParserOutput(); // causes content to be parsed
168 $output->setCacheTime( $update->getRevision()->getTimestamp() );
169
170 // emulate a cache value that kind of looks like a PreparedEdit, for use below
171 $editInfo = (object)[
172 'pstContent' => $update->getRawContent( SlotRecord::MAIN ),
173 'output' => $output,
174 'timestamp' => $output->getCacheTime()
175 ];
176
177 $alreadyCached = false;
178 }
179
180 $logContext = [ 'cachekey' => $key, 'title' => (string)$page ];
181
182 if ( $editInfo->output ) {
183 // Let extensions add ParserOutput metadata or warm other caches
184 $legacyUser = $this->userFactory->newFromUserIdentity( $user );
185 $legacyPage = $this->wikiPageFactory->newFromTitle( $page );
186 $this->hookRunner->onParserOutputStashForEdit(
187 $legacyPage, $content, $editInfo->output, $summary, $legacyUser );
188
189 if ( $alreadyCached ) {
190 $logger->debug( "Parser output for key '{cachekey}' already cached.", $logContext );
191
192 return self::ERROR_NONE;
193 }
194
195 $code = $this->storeStashValue(
196 $key,
197 $editInfo->pstContent,
198 $editInfo->output,
199 $editInfo->timestamp,
200 $user
201 );
202
203 if ( $code === true ) {
204 $logger->debug( "Cached parser output for key '{cachekey}'.", $logContext );
205
206 return self::ERROR_NONE;
207 } elseif ( $code === 'uncacheable' ) {
208 $logger->info(
209 "Uncacheable parser output for key '{cachekey}' [{code}].",
210 $logContext + [ 'code' => $code ]
211 );
212
214 } else {
215 $logger->error(
216 "Failed to cache parser output for key '{cachekey}'.",
217 $logContext + [ 'code' => $code ]
218 );
219
220 return self::ERROR_CACHE;
221 }
222 }
223
224 return self::ERROR_PARSE;
225 }
226
247 public function checkCache( PageIdentity $page, Content $content, UserIdentity $user ) {
248 $legacyUser = $this->userFactory->newFromUserIdentity( $user );
249 if (
250 // The context is not an HTTP POST request
251 !$legacyUser->getRequest()->wasPosted() ||
252 // The context is a CLI script or a job runner HTTP POST request
253 $this->initiator !== self::INITIATOR_USER ||
254 // The editor account is a known bot
255 $legacyUser->isBot()
256 ) {
257 // Avoid wasted queries and statsd pollution
258 return false;
259 }
260
261 $logger = $this->logger;
262
263 $key = $this->getStashKey( $page, $this->getContentHash( $content ), $user );
264 $logContext = [
265 'key' => $key,
266 'title' => (string)$page,
267 'user' => $user->getName()
268 ];
269
270 $editInfo = $this->getAndWaitForStashValue( $key );
271 if ( !is_object( $editInfo ) || !$editInfo->output ) {
272 $this->incrCacheReadStats( 'miss', 'no_stash', $content );
273 if ( $this->recentStashEntryCount( $user ) > 0 ) {
274 $logger->info( "Empty cache for key '{key}' but not for user.", $logContext );
275 } else {
276 $logger->debug( "Empty cache for key '{key}'.", $logContext );
277 }
278
279 return false;
280 }
281
282 $age = time() - (int)wfTimestamp( TS_UNIX, $editInfo->output->getCacheTime() );
283 $logContext['age'] = $age;
284
285 $isCacheUsable = true;
286 if ( $age <= self::PRESUME_FRESH_TTL_SEC ) {
287 // Assume nothing changed in this time
288 $this->incrCacheReadStats( 'hit', 'presumed_fresh', $content );
289 $logger->debug( "Timestamp-based cache hit for key '{key}'.", $logContext );
290 } elseif ( !$user->isRegistered() ) {
291 $lastEdit = $this->lastEditTime( $user );
292 $cacheTime = $editInfo->output->getCacheTime();
293 if ( $lastEdit < $cacheTime ) {
294 // Logged-out user made no local upload/template edits in the meantime
295 $this->incrCacheReadStats( 'hit', 'presumed_fresh', $content );
296 $logger->debug( "Edit check based cache hit for key '{key}'.", $logContext );
297 } else {
298 $isCacheUsable = false;
299 $this->incrCacheReadStats( 'miss', 'proven_stale', $content );
300 $logger->info( "Stale cache for key '{key}' due to outside edits.", $logContext );
301 }
302 } else {
303 if ( $editInfo->edits === $this->userEditTracker->getUserEditCount( $user ) ) {
304 // Logged-in user made no local upload/template edits in the meantime
305 $this->incrCacheReadStats( 'hit', 'presumed_fresh', $content );
306 $logger->debug( "Edit count based cache hit for key '{key}'.", $logContext );
307 } else {
308 $isCacheUsable = false;
309 $this->incrCacheReadStats( 'miss', 'proven_stale', $content );
310 $logger->info( "Stale cache for key '{key}'due to outside edits.", $logContext );
311 }
312 }
313
314 if ( !$isCacheUsable ) {
315 return false;
316 }
317
318 if ( $editInfo->output->getOutputFlag( ParserOutputFlags::VARY_REVISION ) ) {
319 // This can be used for the initial parse, e.g. for filters or doUserEditContent(),
320 // but a second parse will be triggered in doEditUpdates() no matter what
321 $logger->info(
322 "Cache for key '{key}' has vary-revision; post-insertion parse inevitable.",
323 $logContext
324 );
325 } else {
326 static $flagsMaybeReparse = [
327 // Similar to the above if we didn't guess the ID correctly
328 ParserOutputFlags::VARY_REVISION_ID,
329 // Similar to the above if we didn't guess the timestamp correctly
330 ParserOutputFlags::VARY_REVISION_TIMESTAMP,
331 // Similar to the above if we didn't guess the content correctly
332 ParserOutputFlags::VARY_REVISION_SHA1,
333 // Similar to the above if we didn't guess page ID correctly
334 ParserOutputFlags::VARY_PAGE_ID,
335 ];
336 foreach ( $flagsMaybeReparse as $flag ) {
337 if ( $editInfo->output->getOutputFlag( $flag ) ) {
338 $logger->debug(
339 "Cache for key '{key}' has $flag; post-insertion parse possible.",
340 $logContext
341 );
342 }
343 }
344 }
345
346 return $editInfo;
347 }
348
354 private function incrCacheReadStats( $result, $reason, Content $content ) {
355 static $subtypeByResult = [ 'miss' => 'cache_misses', 'hit' => 'cache_hits' ];
356 $this->stats->getCounter( "editstash_cache_checks_total" )
357 ->setLabel( 'reason', $reason )
358 ->setLabel( 'result', $result )
359 ->setLabel( 'model', $content->getModel() )
360 ->copyToStatsdAt( [
361 'editstash.' . $subtypeByResult[ $result ] . '.' . $reason,
362 'editstash_by_model.' . $content->getModel() . '.' . $subtypeByResult[ $result ] . '.' . $reason ] )
363 ->increment();
364 }
365
370 private function getAndWaitForStashValue( $key ) {
371 $editInfo = $this->getStashValue( $key );
372
373 if ( !$editInfo ) {
374 $start = microtime( true );
375 // We ignore user aborts and keep parsing. Block on any prior parsing
376 // so as to use its results and make use of the time spent parsing.
377 $dbw = $this->dbProvider->getPrimaryDatabase();
378 if ( $dbw->lock( $key, __METHOD__, 30 ) ) {
379 $editInfo = $this->getStashValue( $key );
380 $dbw->unlock( $key, __METHOD__ );
381 }
382
383 $timeMs = 1000 * max( 0, microtime( true ) - $start );
384 $this->stats->getTiming( 'editstash_lock_wait_seconds' )
385 ->copyToStatsdAt( 'editstash.lock_wait_time' )
386 ->observe( $timeMs );
387 }
388
389 return $editInfo;
390 }
391
396 public function fetchInputText( $textHash ) {
397 $textKey = $this->cache->makeKey( 'stashedit', 'text', $textHash );
398
399 return $this->cache->get( $textKey );
400 }
401
407 public function stashInputText( $text, $textHash ) {
408 $textKey = $this->cache->makeKey( 'stashedit', 'text', $textHash );
409
410 return $this->cache->set(
411 $textKey,
412 $text,
413 self::MAX_CACHE_TTL,
414 BagOStuff::WRITE_ALLOW_SEGMENTS
415 );
416 }
417
422 private function lastEditTime( UserIdentity $user ) {
423 $time = $this->dbProvider->getReplicaDatabase()->newSelectQueryBuilder()
424 ->select( 'MAX(rc_timestamp)' )
425 ->from( 'recentchanges' )
426 ->join( 'actor', null, 'actor_id=rc_actor' )
427 ->where( [ 'actor_name' => $user->getName() ] )
428 ->caller( __METHOD__ )
429 ->fetchField();
430
431 return wfTimestampOrNull( TS_MW, $time );
432 }
433
440 private function getContentHash( Content $content ) {
441 return sha1( implode( "\n", [
442 $content->getModel(),
443 $content->getDefaultFormat(),
444 $content->serialize( $content->getDefaultFormat() )
445 ] ) );
446 }
447
460 private function getStashKey( PageIdentity $page, $contentHash, UserIdentity $user ) {
461 return $this->cache->makeKey(
462 'stashedit-info-v2',
463 md5( "{$page->getNamespace()}\n{$page->getDBkey()}" ),
464 // Account for the edit model/text
465 $contentHash,
466 // Account for user name related variables like signatures
467 md5( "{$user->getId()}\n{$user->getName()}" )
468 );
469 }
470
475 private function getStashValue( $key ) {
476 $serial = $this->cache->get( $key );
477
478 return $this->unserializeStashInfo( $serial );
479 }
480
493 private function storeStashValue(
494 $key,
495 Content $pstContent,
496 ParserOutput $parserOutput,
497 $timestamp,
498 UserIdentity $user
499 ) {
500 // If an item is renewed, mind the cache TTL determined by config and parser functions.
501 // Put an upper limit on the TTL to avoid extreme template/file staleness.
502 $age = time() - (int)wfTimestamp( TS_UNIX, $parserOutput->getCacheTime() );
503 $ttl = min( $parserOutput->getCacheExpiry() - $age, self::MAX_CACHE_TTL );
504 // Avoid extremely stale user signature timestamps (T84843)
505 if ( $parserOutput->getOutputFlag( ParserOutputFlags::USER_SIGNATURE ) ) {
506 $ttl = min( $ttl, self::MAX_SIGNATURE_TTL );
507 }
508
509 if ( $ttl <= 0 ) {
510 return 'uncacheable'; // low TTL due to a tag, magic word, or signature?
511 }
512
513 // Store what is actually needed and split the output into another key (T204742)
514 $stashInfo = (object)[
515 'pstContent' => $pstContent,
516 'output' => $parserOutput,
517 'timestamp' => $timestamp,
518 'edits' => $this->userEditTracker->getUserEditCount( $user ),
519 ];
520 $serial = $this->serializeStashInfo( $stashInfo );
521 if ( $serial === false ) {
522 return 'store_error';
523 }
524
525 $ok = $this->cache->set( $key, $serial, $ttl, BagOStuff::WRITE_ALLOW_SEGMENTS );
526 if ( $ok ) {
527 // These blobs can waste slots in low cardinality memcached slabs
528 $this->pruneExcessStashedEntries( $user, $key );
529 }
530
531 return $ok ? true : 'store_error';
532 }
533
538 private function pruneExcessStashedEntries( UserIdentity $user, $newKey ) {
539 $key = $this->cache->makeKey( 'stash-edit-recent', sha1( $user->getName() ) );
540
541 $keyList = $this->cache->get( $key ) ?: [];
542 if ( count( $keyList ) >= self::MAX_CACHE_RECENT ) {
543 $oldestKey = array_shift( $keyList );
544 $this->cache->delete( $oldestKey, BagOStuff::WRITE_ALLOW_SEGMENTS );
545 }
546
547 $keyList[] = $newKey;
548 $this->cache->set( $key, $keyList, 2 * self::MAX_CACHE_TTL );
549 }
550
555 private function recentStashEntryCount( UserIdentity $user ) {
556 $key = $this->cache->makeKey( 'stash-edit-recent', sha1( $user->getName() ) );
557
558 return count( $this->cache->get( $key ) ?: [] );
559 }
560
561 private function serializeStashInfo( stdClass $stashInfo ) {
562 // @todo: use JSON with ParserOutput and Content
563 return serialize( $stashInfo );
564 }
565
566 private function unserializeStashInfo( $serial ) {
567 if ( is_string( $serial ) ) {
568 // @todo: use JSON with ParserOutput and Content
569 $stashInfo = unserialize( $serial );
570 if ( is_object( $stashInfo ) && $stashInfo->output instanceof ParserOutput ) {
571 return $stashInfo;
572 }
573 }
574
575 return false;
576 }
577}
const EDIT_INTERNAL
Definition Defines.php:134
wfTimestampOrNull( $outputtype=TS_UNIX, $ts=null)
Return a formatted timestamp, or null if input is null.
wfTimestamp( $outputtype=TS_UNIX, $ts=0)
Get a timestamp string in one of various formats.
wfDeprecated( $function, $version=false, $component=false, $callerOffset=2)
Logs a warning that a deprecated feature was used.
This class provides an implementation of the core hook interfaces, forwarding hook calls to HookConta...
Service for creating WikiPage objects.
ParserOutput is a rendering of a Content object or a message.
Value object representing a content slot associated with a page revision.
Manage the pre-emptive page parsing for edits to wiki pages.
checkCache(PageIdentity $page, Content $content, UserIdentity $user)
Check that a prepared edit is in cache and still up-to-date.
parseAndCache( $pageUpdater, Content $content, UserIdentity $user, string $summary)
__construct(BagOStuff $cache, IConnectionProvider $dbProvider, LoggerInterface $logger, StatsFactory $stats, UserEditTracker $userEditTracker, UserFactory $userFactory, WikiPageFactory $wikiPageFactory, HookContainer $hookContainer, $initiator)
Track info about user edit counts and timings.
Creates User objects.
Base representation for an editable wiki page.
Definition WikiPage.php:85
Abstract class for any ephemeral data store.
Definition BagOStuff.php:89
This is the primary interface for validating metrics definitions, caching defined metrics,...
Base interface for representing page content.
Definition Content.php:39
getModel()
Returns the ID of the content model used by this Content object.
Interface for objects (potentially) representing an editable wiki page.
This is a hook handler interface, see docs/Hooks.md.
Interface for objects representing user identity.
isRegistered()
This must be equivalent to getId() != 0 and is provided for code readability.
Provide primary and replica IDatabase connections.