MediaWiki  master
PageEditStash.php
Go to the documentation of this file.
1 <?php
23 namespace MediaWiki\Storage;
24 
25 use ActorMigration;
26 use BagOStuff;
27 use Content;
28 use Liuggio\StatsdClient\Factory\StatsdDataFactoryInterface;
32 use ParserOutput;
33 use Psr\Log\LoggerInterface;
34 use stdClass;
35 use Title;
36 use User;
38 use Wikimedia\ScopedCallback;
39 use WikiPage;
40 
48  private $cache;
50  private $lb;
52  private $logger;
54  private $stats;
56  private $hookRunner;
58  private $initiator;
59 
60  public const ERROR_NONE = 'stashed';
61  public const ERROR_PARSE = 'error_parse';
62  public const ERROR_CACHE = 'error_cache';
63  public const ERROR_UNCACHEABLE = 'uncacheable';
64  public const ERROR_BUSY = 'busy';
65 
66  public const PRESUME_FRESH_TTL_SEC = 30;
67  public const MAX_CACHE_TTL = 300; // 5 minutes
68  public const MAX_SIGNATURE_TTL = 60;
69 
70  private const MAX_CACHE_RECENT = 2;
71 
72  public const INITIATOR_USER = 1;
73  public const INITIATOR_JOB_OR_CLI = 2;
74 
83  public function __construct(
86  LoggerInterface $logger,
87  StatsdDataFactoryInterface $stats,
88  HookContainer $hookContainer,
90  ) {
91  $this->cache = $cache;
92  $this->lb = $lb;
93  $this->logger = $logger;
94  $this->stats = $stats;
95  $this->hookRunner = new HookRunner( $hookContainer );
96  $this->initiator = $initiator;
97  }
98 
106  public function parseAndCache( WikiPage $page, Content $content, User $user, string $summary ) {
108 
109  $title = $page->getTitle();
110  $key = $this->getStashKey( $title, $this->getContentHash( $content ), $user );
111  $fname = __METHOD__;
112 
113  // Use the master DB to allow for fast blocking locks on the "save path" where this
114  // value might actually be used to complete a page edit. If the edit submission request
115  // happens before this edit stash requests finishes, then the submission will block until
116  // the stash request finishes parsing. For the lock acquisition below, there is not much
117  // need to duplicate parsing of the same content/user/summary bundle, so try to avoid
118  // blocking at all here.
119  $dbw = $this->lb->getConnectionRef( DB_MASTER );
120  if ( !$dbw->lock( $key, $fname, 0 ) ) {
121  // De-duplicate requests on the same key
122  return self::ERROR_BUSY;
123  }
125  $unlocker = new ScopedCallback( function () use ( $dbw, $key, $fname ) {
126  $dbw->unlock( $key, $fname );
127  } );
128 
129  $cutoffTime = time() - self::PRESUME_FRESH_TTL_SEC;
130 
131  // Reuse any freshly build matching edit stash cache
132  $editInfo = $this->getStashValue( $key );
133  if ( $editInfo && wfTimestamp( TS_UNIX, $editInfo->timestamp ) >= $cutoffTime ) {
134  $alreadyCached = true;
135  } else {
136  $format = $content->getDefaultFormat();
137  $editInfo = $page->prepareContentForEdit( $content, null, $user, $format, false );
138  $editInfo->output->setCacheTime( $editInfo->timestamp );
139  $alreadyCached = false;
140  }
141 
142  $context = [ 'cachekey' => $key, 'title' => $title->getPrefixedText() ];
143 
144  if ( $editInfo && $editInfo->output ) {
145  // Let extensions add ParserOutput metadata or warm other caches
146  $this->hookRunner->onParserOutputStashForEdit(
147  $page, $content, $editInfo->output, $summary, $user );
148 
149  if ( $alreadyCached ) {
150  $logger->debug( "Parser output for key '{cachekey}' already cached.", $context );
151 
152  return self::ERROR_NONE;
153  }
154 
155  $code = $this->storeStashValue(
156  $key,
157  $editInfo->pstContent,
158  $editInfo->output,
159  $editInfo->timestamp,
160  $user
161  );
162 
163  if ( $code === true ) {
164  $logger->debug( "Cached parser output for key '{cachekey}'.", $context );
165 
166  return self::ERROR_NONE;
167  } elseif ( $code === 'uncacheable' ) {
168  $logger->info(
169  "Uncacheable parser output for key '{cachekey}' [{code}].",
170  $context + [ 'code' => $code ]
171  );
172 
174  } else {
175  $logger->error(
176  "Failed to cache parser output for key '{cachekey}'.",
177  $context + [ 'code' => $code ]
178  );
179 
180  return self::ERROR_CACHE;
181  }
182  }
183 
184  return self::ERROR_PARSE;
185  }
186 
207  public function checkCache( Title $title, Content $content, User $user ) {
208  if (
209  // The context is not an HTTP POST request
210  !$user->getRequest()->wasPosted() ||
211  // The context is a CLI script or a job runner HTTP POST request
212  $this->initiator !== self::INITIATOR_USER ||
213  // The editor account is a known bot
214  $user->isBot()
215  ) {
216  // Avoid wasted queries and statsd pollution
217  return false;
218  }
219 
221 
222  $key = $this->getStashKey( $title, $this->getContentHash( $content ), $user );
223  $context = [
224  'key' => $key,
225  'title' => $title->getPrefixedText(),
226  'user' => $user->getName()
227  ];
228 
229  $editInfo = $this->getAndWaitForStashValue( $key );
230  if ( !is_object( $editInfo ) || !$editInfo->output ) {
231  $this->incrStatsByContent( 'cache_misses.no_stash', $content );
232  if ( $this->recentStashEntryCount( $user ) > 0 ) {
233  $logger->info( "Empty cache for key '{key}' but not for user.", $context );
234  } else {
235  $logger->debug( "Empty cache for key '{key}'.", $context );
236  }
237 
238  return false;
239  }
240 
241  $age = time() - (int)wfTimestamp( TS_UNIX, $editInfo->output->getCacheTime() );
242  $context['age'] = $age;
243 
244  $isCacheUsable = true;
245  if ( $age <= self::PRESUME_FRESH_TTL_SEC ) {
246  // Assume nothing changed in this time
247  $this->incrStatsByContent( 'cache_hits.presumed_fresh', $content );
248  $logger->debug( "Timestamp-based cache hit for key '{key}'.", $context );
249  } elseif ( $user->isAnon() ) {
250  $lastEdit = $this->lastEditTime( $user );
251  $cacheTime = $editInfo->output->getCacheTime();
252  if ( $lastEdit < $cacheTime ) {
253  // Logged-out user made no local upload/template edits in the meantime
254  $this->incrStatsByContent( 'cache_hits.presumed_fresh', $content );
255  $logger->debug( "Edit check based cache hit for key '{key}'.", $context );
256  } else {
257  $isCacheUsable = false;
258  $this->incrStatsByContent( 'cache_misses.proven_stale', $content );
259  $logger->info( "Stale cache for key '{key}' due to outside edits.", $context );
260  }
261  } else {
262  if ( $editInfo->edits === $user->getEditCount() ) {
263  // Logged-in user made no local upload/template edits in the meantime
264  $this->incrStatsByContent( 'cache_hits.presumed_fresh', $content );
265  $logger->debug( "Edit count based cache hit for key '{key}'.", $context );
266  } else {
267  $isCacheUsable = false;
268  $this->incrStatsByContent( 'cache_misses.proven_stale', $content );
269  $logger->info( "Stale cache for key '{key}'due to outside edits.", $context );
270  }
271  }
272 
273  if ( !$isCacheUsable ) {
274  return false;
275  }
276 
277  if ( $editInfo->output->getFlag( 'vary-revision' ) ) {
278  // This can be used for the initial parse, e.g. for filters or doEditContent(),
279  // but a second parse will be triggered in doEditUpdates() no matter what
280  $logger->info(
281  "Cache for key '{key}' has vary-revision; post-insertion parse inevitable.",
282  $context
283  );
284  } else {
285  static $flagsMaybeReparse = [
286  // Similar to the above if we didn't guess the ID correctly
287  'vary-revision-id',
288  // Similar to the above if we didn't guess the timestamp correctly
289  'vary-revision-timestamp',
290  // Similar to the above if we didn't guess the content correctly
291  'vary-revision-sha1',
292  // Similar to the above if we didn't guess page ID correctly
293  'vary-page-id'
294  ];
295  foreach ( $flagsMaybeReparse as $flag ) {
296  if ( $editInfo->output->getFlag( $flag ) ) {
297  $logger->debug(
298  "Cache for key '{key}' has $flag; post-insertion parse possible.",
299  $context
300  );
301  }
302  }
303  }
304 
305  return $editInfo;
306  }
307 
312  private function incrStatsByContent( $subkey, Content $content ) {
313  $this->stats->increment( 'editstash.' . $subkey ); // overall for b/c
314  $this->stats->increment( 'editstash_by_model.' . $content->getModel() . '.' . $subkey );
315  }
316 
321  private function getAndWaitForStashValue( $key ) {
322  $editInfo = $this->getStashValue( $key );
323 
324  if ( !$editInfo ) {
325  $start = microtime( true );
326  // We ignore user aborts and keep parsing. Block on any prior parsing
327  // so as to use its results and make use of the time spent parsing.
328  // Skip this logic if there no master connection in case this method
329  // is called on an HTTP GET request for some reason.
330  $dbw = $this->lb->getAnyOpenConnection( $this->lb->getWriterIndex() );
331  if ( $dbw && $dbw->lock( $key, __METHOD__, 30 ) ) {
332  $editInfo = $this->getStashValue( $key );
333  $dbw->unlock( $key, __METHOD__ );
334  }
335 
336  $timeMs = 1000 * max( 0, microtime( true ) - $start );
337  $this->stats->timing( 'editstash.lock_wait_time', $timeMs );
338  }
339 
340  return $editInfo;
341  }
342 
347  public function fetchInputText( $textHash ) {
348  $textKey = $this->cache->makeKey( 'stashedit', 'text', $textHash );
349 
350  return $this->cache->get( $textKey );
351  }
352 
358  public function stashInputText( $text, $textHash ) {
359  $textKey = $this->cache->makeKey( 'stashedit', 'text', $textHash );
360 
361  return $this->cache->set(
362  $textKey,
363  $text,
364  self::MAX_CACHE_TTL,
366  );
367  }
368 
373  private function lastEditTime( User $user ) {
374  $db = $this->lb->getConnectionRef( DB_REPLICA );
375 
376  $actorQuery = ActorMigration::newMigration()->getWhere( $db, 'rc_user', $user, false );
377  $time = $db->selectField(
378  [ 'recentchanges' ] + $actorQuery['tables'],
379  'MAX(rc_timestamp)',
380  [ $actorQuery['conds'] ],
381  __METHOD__,
382  [],
383  $actorQuery['joins']
384  );
385 
386  return wfTimestampOrNull( TS_MW, $time );
387  }
388 
395  private function getContentHash( Content $content ) {
396  return sha1( implode( "\n", [
397  $content->getModel(),
398  $content->getDefaultFormat(),
399  $content->serialize( $content->getDefaultFormat() )
400  ] ) );
401  }
402 
415  private function getStashKey( Title $title, $contentHash, User $user ) {
416  return $this->cache->makeKey(
417  'stashedit-info-v1',
418  md5( $title->getPrefixedDBkey() ),
419  // Account for the edit model/text
420  $contentHash,
421  // Account for user name related variables like signatures
422  md5( $user->getId() . "\n" . $user->getName() )
423  );
424  }
425 
430  private function getStashValue( $key ) {
431  $stashInfo = $this->cache->get( $key );
432  if ( is_object( $stashInfo ) && $stashInfo->output instanceof ParserOutput ) {
433  return $stashInfo;
434  }
435 
436  return false;
437  }
438 
451  private function storeStashValue(
452  $key,
453  Content $pstContent,
454  ParserOutput $parserOutput,
455  $timestamp,
456  User $user
457  ) {
458  // If an item is renewed, mind the cache TTL determined by config and parser functions.
459  // Put an upper limit on the TTL for sanity to avoid extreme template/file staleness.
460  $age = time() - (int)wfTimestamp( TS_UNIX, $parserOutput->getCacheTime() );
461  $ttl = min( $parserOutput->getCacheExpiry() - $age, self::MAX_CACHE_TTL );
462  // Avoid extremely stale user signature timestamps (T84843)
463  if ( $parserOutput->getFlag( 'user-signature' ) ) {
464  $ttl = min( $ttl, self::MAX_SIGNATURE_TTL );
465  }
466 
467  if ( $ttl <= 0 ) {
468  return 'uncacheable'; // low TTL due to a tag, magic word, or signature?
469  }
470 
471  // Store what is actually needed and split the output into another key (T204742)
472  $stashInfo = (object)[
473  'pstContent' => $pstContent,
474  'output' => $parserOutput,
475  'timestamp' => $timestamp,
476  'edits' => $user->getEditCount()
477  ];
478 
479  $ok = $this->cache->set( $key, $stashInfo, $ttl, BagOStuff::WRITE_ALLOW_SEGMENTS );
480  if ( $ok ) {
481  // These blobs can waste slots in low cardinality memcached slabs
482  $this->pruneExcessStashedEntries( $user, $key );
483  }
484 
485  return $ok ? true : 'store_error';
486  }
487 
492  private function pruneExcessStashedEntries( User $user, $newKey ) {
493  $key = $this->cache->makeKey( 'stash-edit-recent', sha1( $user->getName() ) );
494 
495  $keyList = $this->cache->get( $key ) ?: [];
496  if ( count( $keyList ) >= self::MAX_CACHE_RECENT ) {
497  $oldestKey = array_shift( $keyList );
498  $this->cache->delete( $oldestKey, BagOStuff::WRITE_PRUNE_SEGMENTS );
499  }
500 
501  $keyList[] = $newKey;
502  $this->cache->set( $key, $keyList, 2 * self::MAX_CACHE_TTL );
503  }
504 
509  private function recentStashEntryCount( User $user ) {
510  $key = $this->cache->makeKey( 'stash-edit-recent', sha1( $user->getName() ) );
511 
512  return count( $this->cache->get( $key ) ?: [] );
513  }
514 }
MediaWiki\Storage\PageEditStash\stashInputText
stashInputText( $text, $textHash)
Definition: PageEditStash.php:358
MediaWiki\Storage\PageEditStash\fetchInputText
fetchInputText( $textHash)
Definition: PageEditStash.php:347
CacheTime\getCacheExpiry
getCacheExpiry()
Returns the number of seconds after which this object should expire.
Definition: CacheTime.php:129
MediaWiki\Storage\PageEditStash\lastEditTime
lastEditTime(User $user)
Definition: PageEditStash.php:373
ParserOutput
Definition: ParserOutput.php:25
User\getId
getId()
Get the user's ID.
Definition: User.php:2025
User\isAnon
isAnon()
Get whether the user is anonymous.
Definition: User.php:3022
MediaWiki\Storage\PageEditStash\ERROR_PARSE
const ERROR_PARSE
Definition: PageEditStash.php:61
User\isBot
isBot()
Definition: User.php:3030
User\getEditCount
getEditCount()
Get the user's edit count.
Definition: User.php:2950
true
return true
Definition: router.php:90
MediaWiki\Storage\PageEditStash\getContentHash
getContentHash(Content $content)
Get hash of the content, factoring in model/format.
Definition: PageEditStash.php:395
wfTimestamp
wfTimestamp( $outputtype=TS_UNIX, $ts=0)
Get a timestamp string in one of various formats.
Definition: GlobalFunctions.php:1808
MediaWiki\Storage\PageEditStash\pruneExcessStashedEntries
pruneExcessStashedEntries(User $user, $newKey)
Definition: PageEditStash.php:492
BagOStuff\WRITE_ALLOW_SEGMENTS
const WRITE_ALLOW_SEGMENTS
Definition: BagOStuff.php:92
MediaWiki\Storage\PageEditStash\incrStatsByContent
incrStatsByContent( $subkey, Content $content)
Definition: PageEditStash.php:312
WikiPage
Class representing a MediaWiki article and history.
Definition: WikiPage.php:52
MediaWiki\Storage\PageEditStash\getStashKey
getStashKey(Title $title, $contentHash, User $user)
Get the temporary prepared edit stash key for a user.
Definition: PageEditStash.php:415
MediaWiki\Storage\PageEditStash\ERROR_CACHE
const ERROR_CACHE
Definition: PageEditStash.php:62
BagOStuff
Class representing a cache/ephemeral data store.
Definition: BagOStuff.php:71
ActorMigration
This class handles the logic for the actor table migration and should always be used in lieu of direc...
Definition: ActorMigration.php:39
MediaWiki\Storage\PageEditStash\$logger
LoggerInterface $logger
Definition: PageEditStash.php:52
ActorMigration\newMigration
static newMigration()
Static constructor.
Definition: ActorMigration.php:140
User\getRequest
getRequest()
Get the WebRequest object to use with this object.
Definition: User.php:3140
MediaWiki\Storage\PageEditStash\__construct
__construct(BagOStuff $cache, ILoadBalancer $lb, LoggerInterface $logger, StatsdDataFactoryInterface $stats, HookContainer $hookContainer, $initiator)
Definition: PageEditStash.php:83
MediaWiki\Storage\PageEditStash\$hookRunner
ParserOutputStashForEditHook $hookRunner
Definition: PageEditStash.php:56
MediaWiki\Storage\PageEditStash\PRESUME_FRESH_TTL_SEC
const PRESUME_FRESH_TTL_SEC
Definition: PageEditStash.php:66
MediaWiki\Storage\Hook\ParserOutputStashForEditHook
Stable to implement.
Definition: ParserOutputStashForEditHook.php:14
MediaWiki\Storage\PageEditStash
Class for managing stashed edits used by the page updater classes.
Definition: PageEditStash.php:46
ParserOutput\getFlag
getFlag( $flag)
Definition: ParserOutput.php:1053
WikiPage\getTitle
getTitle()
Get the title object of the article.
Definition: WikiPage.php:281
wfTimestampOrNull
wfTimestampOrNull( $outputtype=TS_UNIX, $ts=null)
Return a formatted timestamp, or null if input is null.
Definition: GlobalFunctions.php:1824
$title
$title
Definition: testCompression.php:38
DB_REPLICA
const DB_REPLICA
Definition: defines.php:25
MediaWiki\Storage\PageEditStash\$initiator
int $initiator
Definition: PageEditStash.php:58
BagOStuff\WRITE_PRUNE_SEGMENTS
const WRITE_PRUNE_SEGMENTS
Definition: BagOStuff.php:93
DB_MASTER
const DB_MASTER
Definition: defines.php:26
MediaWiki\Storage\PageEditStash\recentStashEntryCount
recentStashEntryCount(User $user)
Definition: PageEditStash.php:509
MediaWiki\Storage\PageEditStash\getAndWaitForStashValue
getAndWaitForStashValue( $key)
Definition: PageEditStash.php:321
MediaWiki\Storage\PageEditStash\storeStashValue
storeStashValue( $key, Content $pstContent, ParserOutput $parserOutput, $timestamp, User $user)
Build a value to store in memcached based on the PST content and parser output.
Definition: PageEditStash.php:451
$content
$content
Definition: router.php:76
MediaWiki\Storage\PageEditStash\$stats
StatsdDataFactoryInterface $stats
Definition: PageEditStash.php:54
MediaWiki\Storage\PageEditStash\getStashValue
getStashValue( $key)
Definition: PageEditStash.php:430
MediaWiki\Storage
Definition: BlobAccessException.php:23
Content
Base interface for content objects.
Definition: Content.php:35
Title
Represents a title within MediaWiki.
Definition: Title.php:42
MediaWiki\Storage\PageEditStash\INITIATOR_USER
const INITIATOR_USER
Definition: PageEditStash.php:72
MediaWiki\Storage\PageEditStash\ERROR_BUSY
const ERROR_BUSY
Definition: PageEditStash.php:64
MediaWiki\Storage\PageEditStash\ERROR_UNCACHEABLE
const ERROR_UNCACHEABLE
Definition: PageEditStash.php:63
WikiPage\prepareContentForEdit
prepareContentForEdit(Content $content, $revision=null, User $user=null, $serialFormat=null, $useCache=true)
Prepare content which is about to be saved.
Definition: WikiPage.php:2047
MediaWiki\Storage\PageEditStash\parseAndCache
parseAndCache(WikiPage $page, Content $content, User $user, string $summary)
Definition: PageEditStash.php:106
MediaWiki\HookContainer\HookContainer
HookContainer class.
Definition: HookContainer.php:44
MediaWiki\Storage\PageEditStash\$lb
ILoadBalancer $lb
Definition: PageEditStash.php:50
MediaWiki\HookContainer\HookRunner
This class provides an implementation of the core hook interfaces, forwarding hook calls to HookConta...
Definition: HookRunner.php:570
MediaWiki\Storage\PageEditStash\MAX_SIGNATURE_TTL
const MAX_SIGNATURE_TTL
Definition: PageEditStash.php:68
MediaWiki\$context
IContextSource $context
Definition: MediaWiki.php:40
MediaWiki\Storage\PageEditStash\checkCache
checkCache(Title $title, Content $content, User $user)
Check that a prepared edit is in cache and still up-to-date.
Definition: PageEditStash.php:207
CacheTime\getCacheTime
getCacheTime()
Definition: CacheTime.php:60
User
The User object encapsulates all of the user-specific settings (user_id, name, rights,...
Definition: User.php:56
MediaWiki\Storage\PageEditStash\MAX_CACHE_RECENT
const MAX_CACHE_RECENT
Definition: PageEditStash.php:70
MediaWiki\Storage\PageEditStash\INITIATOR_JOB_OR_CLI
const INITIATOR_JOB_OR_CLI
Definition: PageEditStash.php:73
MediaWiki\Storage\PageEditStash\MAX_CACHE_TTL
const MAX_CACHE_TTL
Definition: PageEditStash.php:67
User\getName
getName()
Get the user name, or the IP of an anonymous user.
Definition: User.php:2054
Wikimedia\Rdbms\ILoadBalancer
Database cluster connection, tracking, load balancing, and transaction manager interface.
Definition: ILoadBalancer.php:81
MediaWiki\Storage\PageEditStash\$cache
BagOStuff $cache
Definition: PageEditStash.php:48
MediaWiki\Storage\PageEditStash\ERROR_NONE
const ERROR_NONE
Definition: PageEditStash.php:60