MediaWiki  master
PageEditStash.php
Go to the documentation of this file.
1 <?php
21 namespace MediaWiki\Storage;
22 
23 use BagOStuff;
24 use Content;
25 use Liuggio\StatsdClient\Factory\StatsdDataFactoryInterface;
36 use ParserOutput;
37 use Psr\Log\LoggerInterface;
38 use stdClass;
40 use Wikimedia\ScopedCallback;
41 use WikiPage;
42 
55  private $cache;
57  private $lb;
59  private $logger;
61  private $stats;
63  private $hookRunner;
65  private $userEditTracker;
67  private $userFactory;
69  private $wikiPageFactory;
71  private $initiator;
72 
73  public const ERROR_NONE = 'stashed';
74  public const ERROR_PARSE = 'error_parse';
75  public const ERROR_CACHE = 'error_cache';
76  public const ERROR_UNCACHEABLE = 'uncacheable';
77  public const ERROR_BUSY = 'busy';
78 
79  public const PRESUME_FRESH_TTL_SEC = 30;
80  public const MAX_CACHE_TTL = 300; // 5 minutes
81  public const MAX_SIGNATURE_TTL = 60;
82 
83  private const MAX_CACHE_RECENT = 2;
84 
85  public const INITIATOR_USER = 1;
86  public const INITIATOR_JOB_OR_CLI = 2;
87 
99  public function __construct(
100  BagOStuff $cache,
101  ILoadBalancer $lb,
102  LoggerInterface $logger,
103  StatsdDataFactoryInterface $stats,
104  UserEditTracker $userEditTracker,
105  UserFactory $userFactory,
106  WikiPageFactory $wikiPageFactory,
107  HookContainer $hookContainer,
108  $initiator
109  ) {
110  $this->cache = $cache;
111  $this->lb = $lb;
112  $this->logger = $logger;
113  $this->stats = $stats;
114  $this->userEditTracker = $userEditTracker;
115  $this->userFactory = $userFactory;
116  $this->wikiPageFactory = $wikiPageFactory;
117  $this->hookRunner = new HookRunner( $hookContainer );
118  $this->initiator = $initiator;
119  }
120 
128  public function parseAndCache( $pageUpdater, Content $content, UserIdentity $user, string $summary ) {
129  $logger = $this->logger;
130 
131  if ( $pageUpdater instanceof WikiPage ) {
132  // TODO: Trigger deprecation warning once extensions have been fixed.
133  // Or better, create PageUpdater::prepareAndStash and deprecate this method.
134  $pageUpdater = $pageUpdater->newPageUpdater( $user );
135  }
136 
137  $page = $pageUpdater->getPage();
138  $key = $this->getStashKey( $page, $this->getContentHash( $content ), $user );
139  $fname = __METHOD__;
140 
141  // Use the primary DB to allow for fast blocking locks on the "save path" where this
142  // value might actually be used to complete a page edit. If the edit submission request
143  // happens before this edit stash requests finishes, then the submission will block until
144  // the stash request finishes parsing. For the lock acquisition below, there is not much
145  // need to duplicate parsing of the same content/user/summary bundle, so try to avoid
146  // blocking at all here.
147  $dbw = $this->lb->getConnectionRef( DB_PRIMARY );
148  if ( !$dbw->lock( $key, $fname, 0 ) ) {
149  // De-duplicate requests on the same key
150  return self::ERROR_BUSY;
151  }
153  $unlocker = new ScopedCallback( static function () use ( $dbw, $key, $fname ) {
154  $dbw->unlock( $key, $fname );
155  } );
156 
157  $cutoffTime = time() - self::PRESUME_FRESH_TTL_SEC;
158 
159  // Reuse any freshly build matching edit stash cache
160  $editInfo = $this->getStashValue( $key );
161  if ( $editInfo && (int)wfTimestamp( TS_UNIX, $editInfo->timestamp ) >= $cutoffTime ) {
162  $alreadyCached = true;
163  } else {
164  $pageUpdater->setContent( SlotRecord::MAIN, $content );
165 
166  $update = $pageUpdater->prepareUpdate( EDIT_INTERNAL ); // applies pre-safe transform
167  $output = $update->getCanonicalParserOutput(); // causes content to be parsed
168  $output->setCacheTime( $update->getRevision()->getTimestamp() );
169 
170  // emulate a cache value that kind of looks like a PreparedEdit, for use below
171  $editInfo = (object)[
172  'pstContent' => $update->getRawContent( SlotRecord::MAIN ),
173  'output' => $output,
174  'timestamp' => $output->getCacheTime()
175  ];
176 
177  $alreadyCached = false;
178  }
179 
180  $logContext = [ 'cachekey' => $key, 'title' => (string)$page ];
181 
182  if ( $editInfo->output ) {
183  // Let extensions add ParserOutput metadata or warm other caches
184  $legacyUser = $this->userFactory->newFromUserIdentity( $user );
185  $legacyPage = $this->wikiPageFactory->newFromTitle( $page );
186  $this->hookRunner->onParserOutputStashForEdit(
187  $legacyPage, $content, $editInfo->output, $summary, $legacyUser );
188 
189  if ( $alreadyCached ) {
190  $logger->debug( "Parser output for key '{cachekey}' already cached.", $logContext );
191 
192  return self::ERROR_NONE;
193  }
194 
195  $code = $this->storeStashValue(
196  $key,
197  $editInfo->pstContent,
198  $editInfo->output,
199  $editInfo->timestamp,
200  $user
201  );
202 
203  if ( $code === true ) {
204  $logger->debug( "Cached parser output for key '{cachekey}'.", $logContext );
205 
206  return self::ERROR_NONE;
207  } elseif ( $code === 'uncacheable' ) {
208  $logger->info(
209  "Uncacheable parser output for key '{cachekey}' [{code}].",
210  $logContext + [ 'code' => $code ]
211  );
212 
214  } else {
215  $logger->error(
216  "Failed to cache parser output for key '{cachekey}'.",
217  $logContext + [ 'code' => $code ]
218  );
219 
220  return self::ERROR_CACHE;
221  }
222  }
223 
224  return self::ERROR_PARSE;
225  }
226 
247  public function checkCache( PageIdentity $page, Content $content, UserIdentity $user ) {
248  $legacyUser = $this->userFactory->newFromUserIdentity( $user );
249  if (
250  // The context is not an HTTP POST request
251  !$legacyUser->getRequest()->wasPosted() ||
252  // The context is a CLI script or a job runner HTTP POST request
253  $this->initiator !== self::INITIATOR_USER ||
254  // The editor account is a known bot
255  $legacyUser->isBot()
256  ) {
257  // Avoid wasted queries and statsd pollution
258  return false;
259  }
260 
261  $logger = $this->logger;
262 
263  $key = $this->getStashKey( $page, $this->getContentHash( $content ), $user );
264  $logContext = [
265  'key' => $key,
266  'title' => (string)$page,
267  'user' => $user->getName()
268  ];
269 
270  $editInfo = $this->getAndWaitForStashValue( $key );
271  if ( !is_object( $editInfo ) || !$editInfo->output ) {
272  $this->incrStatsByContent( 'cache_misses.no_stash', $content );
273  if ( $this->recentStashEntryCount( $user ) > 0 ) {
274  $logger->info( "Empty cache for key '{key}' but not for user.", $logContext );
275  } else {
276  $logger->debug( "Empty cache for key '{key}'.", $logContext );
277  }
278 
279  return false;
280  }
281 
282  $age = time() - (int)wfTimestamp( TS_UNIX, $editInfo->output->getCacheTime() );
283  $logContext['age'] = $age;
284 
285  $isCacheUsable = true;
286  if ( $age <= self::PRESUME_FRESH_TTL_SEC ) {
287  // Assume nothing changed in this time
288  $this->incrStatsByContent( 'cache_hits.presumed_fresh', $content );
289  $logger->debug( "Timestamp-based cache hit for key '{key}'.", $logContext );
290  } elseif ( !$user->isRegistered() ) {
291  $lastEdit = $this->lastEditTime( $user );
292  $cacheTime = $editInfo->output->getCacheTime();
293  if ( $lastEdit < $cacheTime ) {
294  // Logged-out user made no local upload/template edits in the meantime
295  $this->incrStatsByContent( 'cache_hits.presumed_fresh', $content );
296  $logger->debug( "Edit check based cache hit for key '{key}'.", $logContext );
297  } else {
298  $isCacheUsable = false;
299  $this->incrStatsByContent( 'cache_misses.proven_stale', $content );
300  $logger->info( "Stale cache for key '{key}' due to outside edits.", $logContext );
301  }
302  } else {
303  if ( $editInfo->edits === $this->userEditTracker->getUserEditCount( $user ) ) {
304  // Logged-in user made no local upload/template edits in the meantime
305  $this->incrStatsByContent( 'cache_hits.presumed_fresh', $content );
306  $logger->debug( "Edit count based cache hit for key '{key}'.", $logContext );
307  } else {
308  $isCacheUsable = false;
309  $this->incrStatsByContent( 'cache_misses.proven_stale', $content );
310  $logger->info( "Stale cache for key '{key}'due to outside edits.", $logContext );
311  }
312  }
313 
314  if ( !$isCacheUsable ) {
315  return false;
316  }
317 
318  if ( $editInfo->output->getOutputFlag( ParserOutputFlags::VARY_REVISION ) ) {
319  // This can be used for the initial parse, e.g. for filters or doUserEditContent(),
320  // but a second parse will be triggered in doEditUpdates() no matter what
321  $logger->info(
322  "Cache for key '{key}' has vary-revision; post-insertion parse inevitable.",
323  $logContext
324  );
325  } else {
326  static $flagsMaybeReparse = [
327  // Similar to the above if we didn't guess the ID correctly
328  ParserOutputFlags::VARY_REVISION_ID,
329  // Similar to the above if we didn't guess the timestamp correctly
330  ParserOutputFlags::VARY_REVISION_TIMESTAMP,
331  // Similar to the above if we didn't guess the content correctly
332  ParserOutputFlags::VARY_REVISION_SHA1,
333  // Similar to the above if we didn't guess page ID correctly
334  ParserOutputFlags::VARY_PAGE_ID,
335  ];
336  foreach ( $flagsMaybeReparse as $flag ) {
337  if ( $editInfo->output->getOutputFlag( $flag ) ) {
338  $logger->debug(
339  "Cache for key '{key}' has $flag; post-insertion parse possible.",
340  $logContext
341  );
342  }
343  }
344  }
345 
346  return $editInfo;
347  }
348 
353  private function incrStatsByContent( $subkey, Content $content ) {
354  $this->stats->increment( 'editstash.' . $subkey ); // overall for b/c
355  $this->stats->increment( 'editstash_by_model.' . $content->getModel() . '.' . $subkey );
356  }
357 
362  private function getAndWaitForStashValue( $key ) {
363  $editInfo = $this->getStashValue( $key );
364 
365  if ( !$editInfo ) {
366  $start = microtime( true );
367  // We ignore user aborts and keep parsing. Block on any prior parsing
368  // so as to use its results and make use of the time spent parsing.
369  // Skip this logic if there no primary connection in case this method
370  // is called on an HTTP GET request for some reason.
371  $dbw = $this->lb->getAnyOpenConnection( $this->lb->getWriterIndex() );
372  if ( $dbw && $dbw->lock( $key, __METHOD__, 30 ) ) {
373  $editInfo = $this->getStashValue( $key );
374  $dbw->unlock( $key, __METHOD__ );
375  }
376 
377  $timeMs = 1000 * max( 0, microtime( true ) - $start );
378  $this->stats->timing( 'editstash.lock_wait_time', $timeMs );
379  }
380 
381  return $editInfo;
382  }
383 
388  public function fetchInputText( $textHash ) {
389  $textKey = $this->cache->makeKey( 'stashedit', 'text', $textHash );
390 
391  return $this->cache->get( $textKey );
392  }
393 
399  public function stashInputText( $text, $textHash ) {
400  $textKey = $this->cache->makeKey( 'stashedit', 'text', $textHash );
401 
402  return $this->cache->set(
403  $textKey,
404  $text,
405  self::MAX_CACHE_TTL,
407  );
408  }
409 
414  private function lastEditTime( UserIdentity $user ) {
415  $db = $this->lb->getConnectionRef( DB_REPLICA );
416 
417  $time = $db->newSelectQueryBuilder()
418  ->select( 'MAX(rc_timestamp)' )
419  ->from( 'recentchanges' )
420  ->join( 'actor', null, 'actor_id=rc_actor' )
421  ->where( [ 'actor_name' => $user->getName() ] )
422  ->caller( __METHOD__ )
423  ->fetchField();
424 
425  return wfTimestampOrNull( TS_MW, $time );
426  }
427 
434  private function getContentHash( Content $content ) {
435  return sha1( implode( "\n", [
436  $content->getModel(),
437  $content->getDefaultFormat(),
438  $content->serialize( $content->getDefaultFormat() )
439  ] ) );
440  }
441 
454  private function getStashKey( PageIdentity $page, $contentHash, UserIdentity $user ) {
455  return $this->cache->makeKey(
456  'stashedit-info-v1',
457  md5( "{$page->getNamespace()}\n{$page->getDBkey()}" ),
458  // Account for the edit model/text
459  $contentHash,
460  // Account for user name related variables like signatures
461  md5( "{$user->getId()}\n{$user->getName()}" )
462  );
463  }
464 
469  private function getStashValue( $key ) {
470  $stashInfo = $this->cache->get( $key );
471  if ( is_object( $stashInfo ) && $stashInfo->output instanceof ParserOutput ) {
472  return $stashInfo;
473  }
474 
475  return false;
476  }
477 
490  private function storeStashValue(
491  $key,
492  Content $pstContent,
493  ParserOutput $parserOutput,
494  $timestamp,
495  UserIdentity $user
496  ) {
497  // If an item is renewed, mind the cache TTL determined by config and parser functions.
498  // Put an upper limit on the TTL to avoid extreme template/file staleness.
499  $age = time() - (int)wfTimestamp( TS_UNIX, $parserOutput->getCacheTime() );
500  $ttl = min( $parserOutput->getCacheExpiry() - $age, self::MAX_CACHE_TTL );
501  // Avoid extremely stale user signature timestamps (T84843)
502  if ( $parserOutput->getOutputFlag( ParserOutputFlags::USER_SIGNATURE ) ) {
503  $ttl = min( $ttl, self::MAX_SIGNATURE_TTL );
504  }
505 
506  if ( $ttl <= 0 ) {
507  return 'uncacheable'; // low TTL due to a tag, magic word, or signature?
508  }
509 
510  // Store what is actually needed and split the output into another key (T204742)
511  $stashInfo = (object)[
512  'pstContent' => $pstContent,
513  'output' => $parserOutput,
514  'timestamp' => $timestamp,
515  'edits' => $user->isRegistered() ? $this->userEditTracker->getUserEditCount( $user ) : null,
516  ];
517 
518  $ok = $this->cache->set( $key, $stashInfo, $ttl, BagOStuff::WRITE_ALLOW_SEGMENTS );
519  if ( $ok ) {
520  // These blobs can waste slots in low cardinality memcached slabs
521  $this->pruneExcessStashedEntries( $user, $key );
522  }
523 
524  return $ok ? true : 'store_error';
525  }
526 
531  private function pruneExcessStashedEntries( UserIdentity $user, $newKey ) {
532  $key = $this->cache->makeKey( 'stash-edit-recent', sha1( $user->getName() ) );
533 
534  $keyList = $this->cache->get( $key ) ?: [];
535  if ( count( $keyList ) >= self::MAX_CACHE_RECENT ) {
536  $oldestKey = array_shift( $keyList );
537  $this->cache->delete( $oldestKey, BagOStuff::WRITE_PRUNE_SEGMENTS );
538  }
539 
540  $keyList[] = $newKey;
541  $this->cache->set( $key, $keyList, 2 * self::MAX_CACHE_TTL );
542  }
543 
548  private function recentStashEntryCount( UserIdentity $user ) {
549  $key = $this->cache->makeKey( 'stash-edit-recent', sha1( $user->getName() ) );
550 
551  return count( $this->cache->get( $key ) ?: [] );
552  }
553 }
const EDIT_INTERNAL
Definition: Defines.php:133
wfTimestampOrNull( $outputtype=TS_UNIX, $ts=null)
Return a formatted timestamp, or null if input is null.
wfTimestamp( $outputtype=TS_UNIX, $ts=0)
Get a timestamp string in one of various formats.
Class representing a cache/ephemeral data store.
Definition: BagOStuff.php:85
const WRITE_ALLOW_SEGMENTS
Allow partitioning of the value if it is a large string.
Definition: BagOStuff.php:124
const WRITE_PRUNE_SEGMENTS
Delete all the segments if the value is partitioned.
Definition: BagOStuff.php:126
getCacheExpiry()
Returns the number of seconds after which this object should expire.
Definition: CacheTime.php:144
getCacheTime()
Definition: CacheTime.php:67
This class provides an implementation of the core hook interfaces, forwarding hook calls to HookConta...
Definition: HookRunner.php:564
Service for creating WikiPage objects.
Value object representing a content slot associated with a page revision.
Definition: SlotRecord.php:40
Manage the pre-emptive page parsing for edits to wiki pages.
checkCache(PageIdentity $page, Content $content, UserIdentity $user)
Check that a prepared edit is in cache and still up-to-date.
parseAndCache( $pageUpdater, Content $content, UserIdentity $user, string $summary)
__construct(BagOStuff $cache, ILoadBalancer $lb, LoggerInterface $logger, StatsdDataFactoryInterface $stats, UserEditTracker $userEditTracker, UserFactory $userFactory, WikiPageFactory $wikiPageFactory, HookContainer $hookContainer, $initiator)
stashInputText( $text, $textHash)
Track info about user edit counts and timings.
Creates User objects.
Definition: UserFactory.php:38
getOutputFlag(string $name)
Provides a uniform interface to various boolean flags stored in the ParserOutput.
Base representation for an editable wiki page.
Definition: WikiPage.php:62
Base interface for content objects.
Definition: Content.php:35
Interface for objects (potentially) representing an editable wiki page.
This is a hook handler interface, see docs/Hooks.md.
Interface for objects representing user identity.
Create and track the database connections and transactions for a given database cluster.
$cache
Definition: mcc.php:33
const DB_REPLICA
Definition: defines.php:26
const DB_PRIMARY
Definition: defines.php:28
$content
Definition: router.php:76
return true
Definition: router.php:90