MediaWiki  master
PageEditStash.php
Go to the documentation of this file.
1 <?php
21 namespace MediaWiki\Storage;
22 
23 use BagOStuff;
24 use Content;
25 use Liuggio\StatsdClient\Factory\StatsdDataFactoryInterface;
36 use ParserOutput;
37 use Psr\Log\LoggerInterface;
38 use stdClass;
40 use Wikimedia\ScopedCallback;
41 use WikiPage;
42 
56  private $cache;
58  private $dbProvider;
60  private $logger;
62  private $stats;
64  private $hookRunner;
66  private $userEditTracker;
68  private $userFactory;
70  private $wikiPageFactory;
72  private $initiator;
73 
74  public const ERROR_NONE = 'stashed';
75  public const ERROR_PARSE = 'error_parse';
76  public const ERROR_CACHE = 'error_cache';
77  public const ERROR_UNCACHEABLE = 'uncacheable';
78  public const ERROR_BUSY = 'busy';
79 
80  public const PRESUME_FRESH_TTL_SEC = 30;
81  public const MAX_CACHE_TTL = 300; // 5 minutes
82  public const MAX_SIGNATURE_TTL = 60;
83 
84  private const MAX_CACHE_RECENT = 2;
85 
86  public const INITIATOR_USER = 1;
87  public const INITIATOR_JOB_OR_CLI = 2;
88 
100  public function __construct(
101  BagOStuff $cache,
102  IConnectionProvider $dbProvider,
103  LoggerInterface $logger,
104  StatsdDataFactoryInterface $stats,
105  UserEditTracker $userEditTracker,
106  UserFactory $userFactory,
107  WikiPageFactory $wikiPageFactory,
108  HookContainer $hookContainer,
109  $initiator
110  ) {
111  $this->cache = $cache;
112  $this->dbProvider = $dbProvider;
113  $this->logger = $logger;
114  $this->stats = $stats;
115  $this->userEditTracker = $userEditTracker;
116  $this->userFactory = $userFactory;
117  $this->wikiPageFactory = $wikiPageFactory;
118  $this->hookRunner = new HookRunner( $hookContainer );
119  $this->initiator = $initiator;
120  }
121 
129  public function parseAndCache( $pageUpdater, Content $content, UserIdentity $user, string $summary ) {
130  $logger = $this->logger;
131 
132  if ( $pageUpdater instanceof WikiPage ) {
133  // TODO: Trigger deprecation warning once extensions have been fixed.
134  // Or better, create PageUpdater::prepareAndStash and deprecate this method.
135  $pageUpdater = $pageUpdater->newPageUpdater( $user );
136  }
137 
138  $page = $pageUpdater->getPage();
139  $key = $this->getStashKey( $page, $this->getContentHash( $content ), $user );
140  $fname = __METHOD__;
141 
142  // Use the primary DB to allow for fast blocking locks on the "save path" where this
143  // value might actually be used to complete a page edit. If the edit submission request
144  // happens before this edit stash requests finishes, then the submission will block until
145  // the stash request finishes parsing. For the lock acquisition below, there is not much
146  // need to duplicate parsing of the same content/user/summary bundle, so try to avoid
147  // blocking at all here.
148  $dbw = $this->dbProvider->getPrimaryDatabase();
149  if ( !$dbw->lock( $key, $fname, 0 ) ) {
150  // De-duplicate requests on the same key
151  return self::ERROR_BUSY;
152  }
154  $unlocker = new ScopedCallback( static function () use ( $dbw, $key, $fname ) {
155  $dbw->unlock( $key, $fname );
156  } );
157 
158  $cutoffTime = time() - self::PRESUME_FRESH_TTL_SEC;
159 
160  // Reuse any freshly build matching edit stash cache
161  $editInfo = $this->getStashValue( $key );
162  if ( $editInfo && (int)wfTimestamp( TS_UNIX, $editInfo->timestamp ) >= $cutoffTime ) {
163  $alreadyCached = true;
164  } else {
165  $pageUpdater->setContent( SlotRecord::MAIN, $content );
166 
167  $update = $pageUpdater->prepareUpdate( EDIT_INTERNAL ); // applies pre-safe transform
168  $output = $update->getCanonicalParserOutput(); // causes content to be parsed
169  $output->setCacheTime( $update->getRevision()->getTimestamp() );
170 
171  // emulate a cache value that kind of looks like a PreparedEdit, for use below
172  $editInfo = (object)[
173  'pstContent' => $update->getRawContent( SlotRecord::MAIN ),
174  'output' => $output,
175  'timestamp' => $output->getCacheTime()
176  ];
177 
178  $alreadyCached = false;
179  }
180 
181  $logContext = [ 'cachekey' => $key, 'title' => (string)$page ];
182 
183  if ( $editInfo->output ) {
184  // Let extensions add ParserOutput metadata or warm other caches
185  $legacyUser = $this->userFactory->newFromUserIdentity( $user );
186  $legacyPage = $this->wikiPageFactory->newFromTitle( $page );
187  $this->hookRunner->onParserOutputStashForEdit(
188  $legacyPage, $content, $editInfo->output, $summary, $legacyUser );
189 
190  if ( $alreadyCached ) {
191  $logger->debug( "Parser output for key '{cachekey}' already cached.", $logContext );
192 
193  return self::ERROR_NONE;
194  }
195 
196  $code = $this->storeStashValue(
197  $key,
198  $editInfo->pstContent,
199  $editInfo->output,
200  $editInfo->timestamp,
201  $user
202  );
203 
204  if ( $code === true ) {
205  $logger->debug( "Cached parser output for key '{cachekey}'.", $logContext );
206 
207  return self::ERROR_NONE;
208  } elseif ( $code === 'uncacheable' ) {
209  $logger->info(
210  "Uncacheable parser output for key '{cachekey}' [{code}].",
211  $logContext + [ 'code' => $code ]
212  );
213 
215  } else {
216  $logger->error(
217  "Failed to cache parser output for key '{cachekey}'.",
218  $logContext + [ 'code' => $code ]
219  );
220 
221  return self::ERROR_CACHE;
222  }
223  }
224 
225  return self::ERROR_PARSE;
226  }
227 
248  public function checkCache( PageIdentity $page, Content $content, UserIdentity $user ) {
249  $legacyUser = $this->userFactory->newFromUserIdentity( $user );
250  if (
251  // The context is not an HTTP POST request
252  !$legacyUser->getRequest()->wasPosted() ||
253  // The context is a CLI script or a job runner HTTP POST request
254  $this->initiator !== self::INITIATOR_USER ||
255  // The editor account is a known bot
256  $legacyUser->isBot()
257  ) {
258  // Avoid wasted queries and statsd pollution
259  return false;
260  }
261 
262  $logger = $this->logger;
263 
264  $key = $this->getStashKey( $page, $this->getContentHash( $content ), $user );
265  $logContext = [
266  'key' => $key,
267  'title' => (string)$page,
268  'user' => $user->getName()
269  ];
270 
271  $editInfo = $this->getAndWaitForStashValue( $key );
272  if ( !is_object( $editInfo ) || !$editInfo->output ) {
273  $this->incrStatsByContent( 'cache_misses.no_stash', $content );
274  if ( $this->recentStashEntryCount( $user ) > 0 ) {
275  $logger->info( "Empty cache for key '{key}' but not for user.", $logContext );
276  } else {
277  $logger->debug( "Empty cache for key '{key}'.", $logContext );
278  }
279 
280  return false;
281  }
282 
283  $age = time() - (int)wfTimestamp( TS_UNIX, $editInfo->output->getCacheTime() );
284  $logContext['age'] = $age;
285 
286  $isCacheUsable = true;
287  if ( $age <= self::PRESUME_FRESH_TTL_SEC ) {
288  // Assume nothing changed in this time
289  $this->incrStatsByContent( 'cache_hits.presumed_fresh', $content );
290  $logger->debug( "Timestamp-based cache hit for key '{key}'.", $logContext );
291  } elseif ( !$user->isRegistered() ) {
292  $lastEdit = $this->lastEditTime( $user );
293  $cacheTime = $editInfo->output->getCacheTime();
294  if ( $lastEdit < $cacheTime ) {
295  // Logged-out user made no local upload/template edits in the meantime
296  $this->incrStatsByContent( 'cache_hits.presumed_fresh', $content );
297  $logger->debug( "Edit check based cache hit for key '{key}'.", $logContext );
298  } else {
299  $isCacheUsable = false;
300  $this->incrStatsByContent( 'cache_misses.proven_stale', $content );
301  $logger->info( "Stale cache for key '{key}' due to outside edits.", $logContext );
302  }
303  } else {
304  if ( $editInfo->edits === $this->userEditTracker->getUserEditCount( $user ) ) {
305  // Logged-in user made no local upload/template edits in the meantime
306  $this->incrStatsByContent( 'cache_hits.presumed_fresh', $content );
307  $logger->debug( "Edit count based cache hit for key '{key}'.", $logContext );
308  } else {
309  $isCacheUsable = false;
310  $this->incrStatsByContent( 'cache_misses.proven_stale', $content );
311  $logger->info( "Stale cache for key '{key}'due to outside edits.", $logContext );
312  }
313  }
314 
315  if ( !$isCacheUsable ) {
316  return false;
317  }
318 
319  if ( $editInfo->output->getOutputFlag( ParserOutputFlags::VARY_REVISION ) ) {
320  // This can be used for the initial parse, e.g. for filters or doUserEditContent(),
321  // but a second parse will be triggered in doEditUpdates() no matter what
322  $logger->info(
323  "Cache for key '{key}' has vary-revision; post-insertion parse inevitable.",
324  $logContext
325  );
326  } else {
327  static $flagsMaybeReparse = [
328  // Similar to the above if we didn't guess the ID correctly
329  ParserOutputFlags::VARY_REVISION_ID,
330  // Similar to the above if we didn't guess the timestamp correctly
331  ParserOutputFlags::VARY_REVISION_TIMESTAMP,
332  // Similar to the above if we didn't guess the content correctly
333  ParserOutputFlags::VARY_REVISION_SHA1,
334  // Similar to the above if we didn't guess page ID correctly
335  ParserOutputFlags::VARY_PAGE_ID,
336  ];
337  foreach ( $flagsMaybeReparse as $flag ) {
338  if ( $editInfo->output->getOutputFlag( $flag ) ) {
339  $logger->debug(
340  "Cache for key '{key}' has $flag; post-insertion parse possible.",
341  $logContext
342  );
343  }
344  }
345  }
346 
347  return $editInfo;
348  }
349 
354  private function incrStatsByContent( $subkey, Content $content ) {
355  $this->stats->increment( 'editstash.' . $subkey ); // overall for b/c
356  $this->stats->increment( 'editstash_by_model.' . $content->getModel() . '.' . $subkey );
357  }
358 
363  private function getAndWaitForStashValue( $key ) {
364  $editInfo = $this->getStashValue( $key );
365 
366  if ( !$editInfo ) {
367  $start = microtime( true );
368  // We ignore user aborts and keep parsing. Block on any prior parsing
369  // so as to use its results and make use of the time spent parsing.
370  $dbw = $this->dbProvider->getPrimaryDatabase();
371  if ( $dbw->lock( $key, __METHOD__, 30 ) ) {
372  $editInfo = $this->getStashValue( $key );
373  $dbw->unlock( $key, __METHOD__ );
374  }
375 
376  $timeMs = 1000 * max( 0, microtime( true ) - $start );
377  $this->stats->timing( 'editstash.lock_wait_time', $timeMs );
378  }
379 
380  return $editInfo;
381  }
382 
387  public function fetchInputText( $textHash ) {
388  $textKey = $this->cache->makeKey( 'stashedit', 'text', $textHash );
389 
390  return $this->cache->get( $textKey );
391  }
392 
398  public function stashInputText( $text, $textHash ) {
399  $textKey = $this->cache->makeKey( 'stashedit', 'text', $textHash );
400 
401  return $this->cache->set(
402  $textKey,
403  $text,
404  self::MAX_CACHE_TTL,
406  );
407  }
408 
413  private function lastEditTime( UserIdentity $user ) {
414  $time = $this->dbProvider->getReplicaDatabase()->newSelectQueryBuilder()
415  ->select( 'MAX(rc_timestamp)' )
416  ->from( 'recentchanges' )
417  ->join( 'actor', null, 'actor_id=rc_actor' )
418  ->where( [ 'actor_name' => $user->getName() ] )
419  ->caller( __METHOD__ )
420  ->fetchField();
421 
422  return wfTimestampOrNull( TS_MW, $time );
423  }
424 
431  private function getContentHash( Content $content ) {
432  return sha1( implode( "\n", [
433  $content->getModel(),
434  $content->getDefaultFormat(),
435  $content->serialize( $content->getDefaultFormat() )
436  ] ) );
437  }
438 
451  private function getStashKey( PageIdentity $page, $contentHash, UserIdentity $user ) {
452  return $this->cache->makeKey(
453  'stashedit-info-v2',
454  md5( "{$page->getNamespace()}\n{$page->getDBkey()}" ),
455  // Account for the edit model/text
456  $contentHash,
457  // Account for user name related variables like signatures
458  md5( "{$user->getId()}\n{$user->getName()}" )
459  );
460  }
461 
466  private function getStashValue( $key ) {
467  $serial = $this->cache->get( $key );
468 
469  return $this->unserializeStashInfo( $serial );
470  }
471 
484  private function storeStashValue(
485  $key,
486  Content $pstContent,
487  ParserOutput $parserOutput,
488  $timestamp,
489  UserIdentity $user
490  ) {
491  // If an item is renewed, mind the cache TTL determined by config and parser functions.
492  // Put an upper limit on the TTL to avoid extreme template/file staleness.
493  $age = time() - (int)wfTimestamp( TS_UNIX, $parserOutput->getCacheTime() );
494  $ttl = min( $parserOutput->getCacheExpiry() - $age, self::MAX_CACHE_TTL );
495  // Avoid extremely stale user signature timestamps (T84843)
496  if ( $parserOutput->getOutputFlag( ParserOutputFlags::USER_SIGNATURE ) ) {
497  $ttl = min( $ttl, self::MAX_SIGNATURE_TTL );
498  }
499 
500  if ( $ttl <= 0 ) {
501  return 'uncacheable'; // low TTL due to a tag, magic word, or signature?
502  }
503 
504  // Store what is actually needed and split the output into another key (T204742)
505  $stashInfo = (object)[
506  'pstContent' => $pstContent,
507  'output' => $parserOutput,
508  'timestamp' => $timestamp,
509  'edits' => $this->userEditTracker->getUserEditCount( $user ),
510  ];
511  $serial = $this->serializeStashInfo( $stashInfo );
512  if ( $serial === false ) {
513  return 'store_error';
514  }
515 
516  $ok = $this->cache->set( $key, $serial, $ttl, BagOStuff::WRITE_ALLOW_SEGMENTS );
517  if ( $ok ) {
518  // These blobs can waste slots in low cardinality memcached slabs
519  $this->pruneExcessStashedEntries( $user, $key );
520  }
521 
522  return $ok ? true : 'store_error';
523  }
524 
529  private function pruneExcessStashedEntries( UserIdentity $user, $newKey ) {
530  $key = $this->cache->makeKey( 'stash-edit-recent', sha1( $user->getName() ) );
531 
532  $keyList = $this->cache->get( $key ) ?: [];
533  if ( count( $keyList ) >= self::MAX_CACHE_RECENT ) {
534  $oldestKey = array_shift( $keyList );
535  $this->cache->delete( $oldestKey, BagOStuff::WRITE_PRUNE_SEGMENTS );
536  }
537 
538  $keyList[] = $newKey;
539  $this->cache->set( $key, $keyList, 2 * self::MAX_CACHE_TTL );
540  }
541 
546  private function recentStashEntryCount( UserIdentity $user ) {
547  $key = $this->cache->makeKey( 'stash-edit-recent', sha1( $user->getName() ) );
548 
549  return count( $this->cache->get( $key ) ?: [] );
550  }
551 
552  private function serializeStashInfo( stdClass $stashInfo ) {
553  // @todo: use JSON with ParserOutput and Content
554  return serialize( $stashInfo );
555  }
556 
557  private function unserializeStashInfo( $serial ) {
558  if ( is_string( $serial ) ) {
559  // @todo: use JSON with ParserOutput and Content
560  $stashInfo = unserialize( $serial );
561  if ( is_object( $stashInfo ) && $stashInfo->output instanceof ParserOutput ) {
562  return $stashInfo;
563  }
564  }
565 
566  return false;
567  }
568 }
const EDIT_INTERNAL
Definition: Defines.php:133
wfTimestampOrNull( $outputtype=TS_UNIX, $ts=null)
Return a formatted timestamp, or null if input is null.
wfTimestamp( $outputtype=TS_UNIX, $ts=0)
Get a timestamp string in one of various formats.
Class representing a cache/ephemeral data store.
Definition: BagOStuff.php:85
const WRITE_ALLOW_SEGMENTS
Allow partitioning of the value if it is a large string.
Definition: BagOStuff.php:119
const WRITE_PRUNE_SEGMENTS
Delete all the segments if the value is partitioned.
Definition: BagOStuff.php:121
getCacheExpiry()
Returns the number of seconds after which this object should expire.
Definition: CacheTime.php:145
getCacheTime()
Definition: CacheTime.php:68
This class provides an implementation of the core hook interfaces, forwarding hook calls to HookConta...
Definition: HookRunner.php:567
Service for creating WikiPage objects.
Value object representing a content slot associated with a page revision.
Definition: SlotRecord.php:40
Manage the pre-emptive page parsing for edits to wiki pages.
checkCache(PageIdentity $page, Content $content, UserIdentity $user)
Check that a prepared edit is in cache and still up-to-date.
parseAndCache( $pageUpdater, Content $content, UserIdentity $user, string $summary)
__construct(BagOStuff $cache, IConnectionProvider $dbProvider, LoggerInterface $logger, StatsdDataFactoryInterface $stats, UserEditTracker $userEditTracker, UserFactory $userFactory, WikiPageFactory $wikiPageFactory, HookContainer $hookContainer, $initiator)
stashInputText( $text, $textHash)
Track info about user edit counts and timings.
Creates User objects.
Definition: UserFactory.php:41
getOutputFlag(string $name)
Provides a uniform interface to various boolean flags stored in the ParserOutput.
Base representation for an editable wiki page.
Definition: WikiPage.php:77
Base interface for representing page content.
Definition: Content.php:39
Interface for objects (potentially) representing an editable wiki page.
This is a hook handler interface, see docs/Hooks.md.
Interface for objects representing user identity.
isRegistered()
This must be equivalent to getId() != 0 and is provided for code readability.
Provide primary and replica IDatabase connections.
$content
Definition: router.php:76
return true
Definition: router.php:90