MediaWiki master
PageEditStash.php
Go to the documentation of this file.
1<?php
21namespace MediaWiki\Storage;
22
23use Content;
35use Psr\Log\LoggerInterface;
36use stdClass;
39use Wikimedia\ScopedCallback;
41use WikiPage;
42
56 private $cache;
58 private $dbProvider;
60 private $logger;
62 private $stats;
64 private $hookRunner;
66 private $userEditTracker;
68 private $userFactory;
70 private $wikiPageFactory;
72 private $initiator;
73
74 public const ERROR_NONE = 'stashed';
75 public const ERROR_PARSE = 'error_parse';
76 public const ERROR_CACHE = 'error_cache';
77 public const ERROR_UNCACHEABLE = 'uncacheable';
78 public const ERROR_BUSY = 'busy';
79
80 public const PRESUME_FRESH_TTL_SEC = 30;
81 public const MAX_CACHE_TTL = 300; // 5 minutes
82 public const MAX_SIGNATURE_TTL = 60;
83
84 private const MAX_CACHE_RECENT = 2;
85
86 public const INITIATOR_USER = 1;
87 public const INITIATOR_JOB_OR_CLI = 2;
88
100 public function __construct(
101 BagOStuff $cache,
102 IConnectionProvider $dbProvider,
103 LoggerInterface $logger,
104 StatsFactory $stats,
105 UserEditTracker $userEditTracker,
106 UserFactory $userFactory,
107 WikiPageFactory $wikiPageFactory,
108 HookContainer $hookContainer,
109 $initiator
110 ) {
111 $this->cache = $cache;
112 $this->dbProvider = $dbProvider;
113 $this->logger = $logger;
114 $this->stats = $stats;
115 $this->userEditTracker = $userEditTracker;
116 $this->userFactory = $userFactory;
117 $this->wikiPageFactory = $wikiPageFactory;
118 $this->hookRunner = new HookRunner( $hookContainer );
119 $this->initiator = $initiator;
120 }
121
129 public function parseAndCache( $pageUpdater, Content $content, UserIdentity $user, string $summary ) {
130 $logger = $this->logger;
131
132 if ( $pageUpdater instanceof WikiPage ) {
133 wfDeprecated( __METHOD__ . ' with WikiPage instance', '1.42' );
134 $pageUpdater = $pageUpdater->newPageUpdater( $user );
135 }
136
137 $page = $pageUpdater->getPage();
138 $key = $this->getStashKey( $page, $this->getContentHash( $content ), $user );
139 $fname = __METHOD__;
140
141 // Use the primary DB to allow for fast blocking locks on the "save path" where this
142 // value might actually be used to complete a page edit. If the edit submission request
143 // happens before this edit stash requests finishes, then the submission will block until
144 // the stash request finishes parsing. For the lock acquisition below, there is not much
145 // need to duplicate parsing of the same content/user/summary bundle, so try to avoid
146 // blocking at all here.
147 $dbw = $this->dbProvider->getPrimaryDatabase();
148 if ( !$dbw->lock( $key, $fname, 0 ) ) {
149 // De-duplicate requests on the same key
150 return self::ERROR_BUSY;
151 }
153 $unlocker = new ScopedCallback( static function () use ( $dbw, $key, $fname ) {
154 $dbw->unlock( $key, $fname );
155 } );
156
157 $cutoffTime = time() - self::PRESUME_FRESH_TTL_SEC;
158
159 // Reuse any freshly build matching edit stash cache
160 $editInfo = $this->getStashValue( $key );
161 if ( $editInfo && (int)wfTimestamp( TS_UNIX, $editInfo->timestamp ) >= $cutoffTime ) {
162 $alreadyCached = true;
163 } else {
164 $pageUpdater->setContent( SlotRecord::MAIN, $content );
165
166 $update = $pageUpdater->prepareUpdate( EDIT_INTERNAL ); // applies pre-safe transform
167 $output = $update->getCanonicalParserOutput(); // causes content to be parsed
168 $output->setCacheTime( $update->getRevision()->getTimestamp() );
169
170 // emulate a cache value that kind of looks like a PreparedEdit, for use below
171 $editInfo = (object)[
172 'pstContent' => $update->getRawContent( SlotRecord::MAIN ),
173 'output' => $output,
174 'timestamp' => $output->getCacheTime()
175 ];
176
177 $alreadyCached = false;
178 }
179
180 $logContext = [ 'cachekey' => $key, 'title' => (string)$page ];
181
182 if ( $editInfo->output ) {
183 // Let extensions add ParserOutput metadata or warm other caches
184 $legacyUser = $this->userFactory->newFromUserIdentity( $user );
185 $legacyPage = $this->wikiPageFactory->newFromTitle( $page );
186 $this->hookRunner->onParserOutputStashForEdit(
187 $legacyPage, $content, $editInfo->output, $summary, $legacyUser );
188
189 if ( $alreadyCached ) {
190 $logger->debug( "Parser output for key '{cachekey}' already cached.", $logContext );
191
192 return self::ERROR_NONE;
193 }
194
195 $code = $this->storeStashValue(
196 $key,
197 $editInfo->pstContent,
198 $editInfo->output,
199 $editInfo->timestamp,
200 $user
201 );
202
203 if ( $code === true ) {
204 $logger->debug( "Cached parser output for key '{cachekey}'.", $logContext );
205
206 return self::ERROR_NONE;
207 } elseif ( $code === 'uncacheable' ) {
208 $logger->info(
209 "Uncacheable parser output for key '{cachekey}' [{code}].",
210 $logContext + [ 'code' => $code ]
211 );
212
214 } else {
215 $logger->error(
216 "Failed to cache parser output for key '{cachekey}'.",
217 $logContext + [ 'code' => $code ]
218 );
219
220 return self::ERROR_CACHE;
221 }
222 }
223
224 return self::ERROR_PARSE;
225 }
226
247 public function checkCache( PageIdentity $page, Content $content, UserIdentity $user ) {
248 $legacyUser = $this->userFactory->newFromUserIdentity( $user );
249 if (
250 // The context is not an HTTP POST request
251 !$legacyUser->getRequest()->wasPosted() ||
252 // The context is a CLI script or a job runner HTTP POST request
253 $this->initiator !== self::INITIATOR_USER ||
254 // The editor account is a known bot
255 $legacyUser->isBot()
256 ) {
257 // Avoid wasted queries and statsd pollution
258 return false;
259 }
260
261 $logger = $this->logger;
262
263 $key = $this->getStashKey( $page, $this->getContentHash( $content ), $user );
264 $logContext = [
265 'key' => $key,
266 'title' => (string)$page,
267 'user' => $user->getName()
268 ];
269
270 $editInfo = $this->getAndWaitForStashValue( $key );
271 if ( !is_object( $editInfo ) || !$editInfo->output ) {
272 $this->incrCacheReadStats( 'miss', 'no_stash', $content );
273 if ( $this->recentStashEntryCount( $user ) > 0 ) {
274 $logger->info( "Empty cache for key '{key}' but not for user.", $logContext );
275 } else {
276 $logger->debug( "Empty cache for key '{key}'.", $logContext );
277 }
278
279 return false;
280 }
281
282 $age = time() - (int)wfTimestamp( TS_UNIX, $editInfo->output->getCacheTime() );
283 $logContext['age'] = $age;
284
285 $isCacheUsable = true;
286 if ( $age <= self::PRESUME_FRESH_TTL_SEC ) {
287 // Assume nothing changed in this time
288 $this->incrCacheReadStats( 'hit', 'presumed_fresh', $content );
289 $logger->debug( "Timestamp-based cache hit for key '{key}'.", $logContext );
290 } elseif ( !$user->isRegistered() ) {
291 $lastEdit = $this->lastEditTime( $user );
292 $cacheTime = $editInfo->output->getCacheTime();
293 if ( $lastEdit < $cacheTime ) {
294 // Logged-out user made no local upload/template edits in the meantime
295 $this->incrCacheReadStats( 'hit', 'presumed_fresh', $content );
296 $logger->debug( "Edit check based cache hit for key '{key}'.", $logContext );
297 } else {
298 $isCacheUsable = false;
299 $this->incrCacheReadStats( 'miss', 'proven_stale', $content );
300 $logger->info( "Stale cache for key '{key}' due to outside edits.", $logContext );
301 }
302 } else {
303 if ( $editInfo->edits === $this->userEditTracker->getUserEditCount( $user ) ) {
304 // Logged-in user made no local upload/template edits in the meantime
305 $this->incrCacheReadStats( 'hit', 'presumed_fresh', $content );
306 $logger->debug( "Edit count based cache hit for key '{key}'.", $logContext );
307 } else {
308 $isCacheUsable = false;
309 $this->incrCacheReadStats( 'miss', 'proven_stale', $content );
310 $logger->info( "Stale cache for key '{key}'due to outside edits.", $logContext );
311 }
312 }
313
314 if ( !$isCacheUsable ) {
315 return false;
316 }
317
318 if ( $editInfo->output->getOutputFlag( ParserOutputFlags::VARY_REVISION ) ) {
319 // This can be used for the initial parse, e.g. for filters or doUserEditContent(),
320 // but a second parse will be triggered in doEditUpdates() no matter what
321 $logger->info(
322 "Cache for key '{key}' has vary-revision; post-insertion parse inevitable.",
323 $logContext
324 );
325 } else {
326 static $flagsMaybeReparse = [
327 // Similar to the above if we didn't guess the ID correctly
328 ParserOutputFlags::VARY_REVISION_ID,
329 // Similar to the above if we didn't guess the timestamp correctly
330 ParserOutputFlags::VARY_REVISION_TIMESTAMP,
331 // Similar to the above if we didn't guess the content correctly
332 ParserOutputFlags::VARY_REVISION_SHA1,
333 // Similar to the above if we didn't guess page ID correctly
334 ParserOutputFlags::VARY_PAGE_ID,
335 ];
336 foreach ( $flagsMaybeReparse as $flag ) {
337 if ( $editInfo->output->getOutputFlag( $flag ) ) {
338 $logger->debug(
339 "Cache for key '{key}' has $flag; post-insertion parse possible.",
340 $logContext
341 );
342 }
343 }
344 }
345
346 return $editInfo;
347 }
348
354 private function incrCacheReadStats( $result, $reason, Content $content ) {
355 static $subtypeByResult = [ 'miss' => 'cache_misses', 'hit' => 'cache_hits' ];
356 $this->stats->getCounter( "editstash_cache_checks_total" )
357 ->setLabel( 'reason', $reason )
358 ->setLabel( 'result', $result )
359 ->setLabel( 'model', $content->getModel() )
360 ->copyToStatsdAt( [
361 'editstash.' . $subtypeByResult[ $result ] . '.' . $reason,
362 'editstash_by_model.' . $content->getModel() . '.' . $subtypeByResult[ $result ] . '.' . $reason ] )
363 ->increment();
364 }
365
370 private function getAndWaitForStashValue( $key ) {
371 $editInfo = $this->getStashValue( $key );
372
373 if ( !$editInfo ) {
374 $start = microtime( true );
375 // We ignore user aborts and keep parsing. Block on any prior parsing
376 // so as to use its results and make use of the time spent parsing.
377 $dbw = $this->dbProvider->getPrimaryDatabase();
378 if ( $dbw->lock( $key, __METHOD__, 30 ) ) {
379 $editInfo = $this->getStashValue( $key );
380 $dbw->unlock( $key, __METHOD__ );
381 }
382
383 $timeMs = 1000 * max( 0, microtime( true ) - $start );
384 $this->stats->getTiming( 'editstash_lock_wait_seconds' )
385 ->copyToStatsdAt( 'editstash.lock_wait_time' )
386 ->observe( $timeMs );
387 }
388
389 return $editInfo;
390 }
391
396 public function fetchInputText( $textHash ) {
397 $textKey = $this->cache->makeKey( 'stashedit', 'text', $textHash );
398
399 return $this->cache->get( $textKey );
400 }
401
407 public function stashInputText( $text, $textHash ) {
408 $textKey = $this->cache->makeKey( 'stashedit', 'text', $textHash );
409
410 return $this->cache->set(
411 $textKey,
412 $text,
413 self::MAX_CACHE_TTL,
414 BagOStuff::WRITE_ALLOW_SEGMENTS
415 );
416 }
417
422 private function lastEditTime( UserIdentity $user ) {
423 $time = $this->dbProvider->getReplicaDatabase()->newSelectQueryBuilder()
424 ->select( 'MAX(rc_timestamp)' )
425 ->from( 'recentchanges' )
426 ->join( 'actor', null, 'actor_id=rc_actor' )
427 ->where( [ 'actor_name' => $user->getName() ] )
428 ->caller( __METHOD__ )
429 ->fetchField();
430
431 return wfTimestampOrNull( TS_MW, $time );
432 }
433
440 private function getContentHash( Content $content ) {
441 return sha1( implode( "\n", [
442 $content->getModel(),
443 $content->getDefaultFormat(),
444 $content->serialize( $content->getDefaultFormat() )
445 ] ) );
446 }
447
460 private function getStashKey( PageIdentity $page, $contentHash, UserIdentity $user ) {
461 return $this->cache->makeKey(
462 'stashedit-info-v2',
463 md5( "{$page->getNamespace()}\n{$page->getDBkey()}" ),
464 // Account for the edit model/text
465 $contentHash,
466 // Account for user name related variables like signatures
467 md5( "{$user->getId()}\n{$user->getName()}" )
468 );
469 }
470
475 private function getStashValue( $key ) {
476 $serial = $this->cache->get( $key );
477
478 return $this->unserializeStashInfo( $serial );
479 }
480
493 private function storeStashValue(
494 $key,
495 Content $pstContent,
496 ParserOutput $parserOutput,
497 $timestamp,
498 UserIdentity $user
499 ) {
500 // If an item is renewed, mind the cache TTL determined by config and parser functions.
501 // Put an upper limit on the TTL to avoid extreme template/file staleness.
502 $age = time() - (int)wfTimestamp( TS_UNIX, $parserOutput->getCacheTime() );
503 $ttl = min( $parserOutput->getCacheExpiry() - $age, self::MAX_CACHE_TTL );
504 // Avoid extremely stale user signature timestamps (T84843)
505 if ( $parserOutput->getOutputFlag( ParserOutputFlags::USER_SIGNATURE ) ) {
506 $ttl = min( $ttl, self::MAX_SIGNATURE_TTL );
507 }
508
509 if ( $ttl <= 0 ) {
510 return 'uncacheable'; // low TTL due to a tag, magic word, or signature?
511 }
512
513 // Store what is actually needed and split the output into another key (T204742)
514 $stashInfo = (object)[
515 'pstContent' => $pstContent,
516 'output' => $parserOutput,
517 'timestamp' => $timestamp,
518 'edits' => $this->userEditTracker->getUserEditCount( $user ),
519 ];
520 $serial = $this->serializeStashInfo( $stashInfo );
521 if ( $serial === false ) {
522 return 'store_error';
523 }
524
525 $ok = $this->cache->set( $key, $serial, $ttl, BagOStuff::WRITE_ALLOW_SEGMENTS );
526 if ( $ok ) {
527 // These blobs can waste slots in low cardinality memcached slabs
528 $this->pruneExcessStashedEntries( $user, $key );
529 }
530
531 return $ok ? true : 'store_error';
532 }
533
538 private function pruneExcessStashedEntries( UserIdentity $user, $newKey ) {
539 $key = $this->cache->makeKey( 'stash-edit-recent', sha1( $user->getName() ) );
540
541 $keyList = $this->cache->get( $key ) ?: [];
542 if ( count( $keyList ) >= self::MAX_CACHE_RECENT ) {
543 $oldestKey = array_shift( $keyList );
544 $this->cache->delete( $oldestKey, BagOStuff::WRITE_PRUNE_SEGMENTS );
545 }
546
547 $keyList[] = $newKey;
548 $this->cache->set( $key, $keyList, 2 * self::MAX_CACHE_TTL );
549 }
550
555 private function recentStashEntryCount( UserIdentity $user ) {
556 $key = $this->cache->makeKey( 'stash-edit-recent', sha1( $user->getName() ) );
557
558 return count( $this->cache->get( $key ) ?: [] );
559 }
560
561 private function serializeStashInfo( stdClass $stashInfo ) {
562 // @todo: use JSON with ParserOutput and Content
563 return serialize( $stashInfo );
564 }
565
566 private function unserializeStashInfo( $serial ) {
567 if ( is_string( $serial ) ) {
568 // @todo: use JSON with ParserOutput and Content
569 $stashInfo = unserialize( $serial );
570 if ( is_object( $stashInfo ) && $stashInfo->output instanceof ParserOutput ) {
571 return $stashInfo;
572 }
573 }
574
575 return false;
576 }
577}
const EDIT_INTERNAL
Definition Defines.php:134
wfTimestampOrNull( $outputtype=TS_UNIX, $ts=null)
Return a formatted timestamp, or null if input is null.
wfTimestamp( $outputtype=TS_UNIX, $ts=0)
Get a timestamp string in one of various formats.
wfDeprecated( $function, $version=false, $component=false, $callerOffset=2)
Logs a warning that a deprecated feature was used.
This class provides an implementation of the core hook interfaces, forwarding hook calls to HookConta...
Service for creating WikiPage objects.
ParserOutput is a rendering of a Content object or a message.
Value object representing a content slot associated with a page revision.
Manage the pre-emptive page parsing for edits to wiki pages.
checkCache(PageIdentity $page, Content $content, UserIdentity $user)
Check that a prepared edit is in cache and still up-to-date.
parseAndCache( $pageUpdater, Content $content, UserIdentity $user, string $summary)
__construct(BagOStuff $cache, IConnectionProvider $dbProvider, LoggerInterface $logger, StatsFactory $stats, UserEditTracker $userEditTracker, UserFactory $userFactory, WikiPageFactory $wikiPageFactory, HookContainer $hookContainer, $initiator)
Track info about user edit counts and timings.
Creates User objects.
Base representation for an editable wiki page.
Definition WikiPage.php:81
Class representing a cache/ephemeral data store.
Definition BagOStuff.php:88
StatsFactory Implementation.
Base interface for representing page content.
Definition Content.php:37
getModel()
Returns the ID of the content model used by this Content object.
serialize( $format=null)
Convenience method for serializing this Content object.
getDefaultFormat()
Convenience method that returns the default serialization format for the content model that this Cont...
Interface for objects (potentially) representing an editable wiki page.
This is a hook handler interface, see docs/Hooks.md.
Interface for objects representing user identity.
isRegistered()
This must be equivalent to getId() != 0 and is provided for code readability.
Provide primary and replica IDatabase connections.