MediaWiki  master
PageEditStash.php
Go to the documentation of this file.
1 <?php
23 namespace MediaWiki\Storage;
24 
25 use BagOStuff;
26 use Content;
27 use Liuggio\StatsdClient\Factory\StatsdDataFactoryInterface;
35 use ParserOutput;
36 use Psr\Log\LoggerInterface;
37 use stdClass;
38 use User;
40 use Wikimedia\ScopedCallback;
41 use WikiPage;
42 
50  private $cache;
52  private $lb;
54  private $logger;
56  private $stats;
58  private $hookRunner;
62  private $userFactory;
64  private $initiator;
65 
66  public const ERROR_NONE = 'stashed';
67  public const ERROR_PARSE = 'error_parse';
68  public const ERROR_CACHE = 'error_cache';
69  public const ERROR_UNCACHEABLE = 'uncacheable';
70  public const ERROR_BUSY = 'busy';
71 
72  public const PRESUME_FRESH_TTL_SEC = 30;
73  public const MAX_CACHE_TTL = 300; // 5 minutes
74  public const MAX_SIGNATURE_TTL = 60;
75 
76  private const MAX_CACHE_RECENT = 2;
77 
78  public const INITIATOR_USER = 1;
79  public const INITIATOR_JOB_OR_CLI = 2;
80 
91  public function __construct(
94  LoggerInterface $logger,
95  StatsdDataFactoryInterface $stats,
98  HookContainer $hookContainer,
100  ) {
101  $this->cache = $cache;
102  $this->lb = $lb;
103  $this->logger = $logger;
104  $this->stats = $stats;
105  $this->userEditTracker = $userEditTracker;
106  $this->userFactory = $userFactory;
107  $this->hookRunner = new HookRunner( $hookContainer );
108  $this->initiator = $initiator;
109  }
110 
118  public function parseAndCache( WikiPage $page, Content $content, UserIdentity $user, string $summary ) {
120 
121  $key = $this->getStashKey( $page, $this->getContentHash( $content ), $user );
122  $fname = __METHOD__;
123 
124  // Use the primary DB to allow for fast blocking locks on the "save path" where this
125  // value might actually be used to complete a page edit. If the edit submission request
126  // happens before this edit stash requests finishes, then the submission will block until
127  // the stash request finishes parsing. For the lock acquisition below, there is not much
128  // need to duplicate parsing of the same content/user/summary bundle, so try to avoid
129  // blocking at all here.
130  $dbw = $this->lb->getConnectionRef( DB_PRIMARY );
131  if ( !$dbw->lock( $key, $fname, 0 ) ) {
132  // De-duplicate requests on the same key
133  return self::ERROR_BUSY;
134  }
136  $unlocker = new ScopedCallback( static function () use ( $dbw, $key, $fname ) {
137  $dbw->unlock( $key, $fname );
138  } );
139 
140  $cutoffTime = time() - self::PRESUME_FRESH_TTL_SEC;
141 
142  // Reuse any freshly build matching edit stash cache
143  $editInfo = $this->getStashValue( $key );
144  if ( $editInfo && wfTimestamp( TS_UNIX, $editInfo->timestamp ) >= $cutoffTime ) {
145  $alreadyCached = true;
146  } else {
147  $format = $content->getDefaultFormat();
148  $editInfo = $page->prepareContentForEdit( $content, null, $user, $format, false );
149  $editInfo->output->setCacheTime( $editInfo->timestamp );
150  $alreadyCached = false;
151  }
152 
153  $logContext = [ 'cachekey' => $key, 'title' => (string)$page ];
154 
155  if ( $editInfo && $editInfo->output ) {
156  // Let extensions add ParserOutput metadata or warm other caches
157  $legacyUser = $this->userFactory->newFromUserIdentity( $user );
158  $this->hookRunner->onParserOutputStashForEdit(
159  $page, $content, $editInfo->output, $summary, $legacyUser );
160 
161  if ( $alreadyCached ) {
162  $logger->debug( "Parser output for key '{cachekey}' already cached.", $logContext );
163 
164  return self::ERROR_NONE;
165  }
166 
167  $code = $this->storeStashValue(
168  $key,
169  $editInfo->pstContent,
170  $editInfo->output,
171  $editInfo->timestamp,
172  $user
173  );
174 
175  if ( $code === true ) {
176  $logger->debug( "Cached parser output for key '{cachekey}'.", $logContext );
177 
178  return self::ERROR_NONE;
179  } elseif ( $code === 'uncacheable' ) {
180  $logger->info(
181  "Uncacheable parser output for key '{cachekey}' [{code}].",
182  $logContext + [ 'code' => $code ]
183  );
184 
186  } else {
187  $logger->error(
188  "Failed to cache parser output for key '{cachekey}'.",
189  $logContext + [ 'code' => $code ]
190  );
191 
192  return self::ERROR_CACHE;
193  }
194  }
195 
196  return self::ERROR_PARSE;
197  }
198 
219  public function checkCache( PageIdentity $page, Content $content, User $user ) {
220  if (
221  // The context is not an HTTP POST request
222  !$user->getRequest()->wasPosted() ||
223  // The context is a CLI script or a job runner HTTP POST request
224  $this->initiator !== self::INITIATOR_USER ||
225  // The editor account is a known bot
226  $user->isBot()
227  ) {
228  // Avoid wasted queries and statsd pollution
229  return false;
230  }
231 
233 
234  $key = $this->getStashKey( $page, $this->getContentHash( $content ), $user );
235  $logContext = [
236  'key' => $key,
237  'title' => (string)$page,
238  'user' => $user->getName()
239  ];
240 
241  $editInfo = $this->getAndWaitForStashValue( $key );
242  if ( !is_object( $editInfo ) || !$editInfo->output ) {
243  $this->incrStatsByContent( 'cache_misses.no_stash', $content );
244  if ( $this->recentStashEntryCount( $user ) > 0 ) {
245  $logger->info( "Empty cache for key '{key}' but not for user.", $logContext );
246  } else {
247  $logger->debug( "Empty cache for key '{key}'.", $logContext );
248  }
249 
250  return false;
251  }
252 
253  $age = time() - (int)wfTimestamp( TS_UNIX, $editInfo->output->getCacheTime() );
254  $logContext['age'] = $age;
255 
256  $isCacheUsable = true;
257  if ( $age <= self::PRESUME_FRESH_TTL_SEC ) {
258  // Assume nothing changed in this time
259  $this->incrStatsByContent( 'cache_hits.presumed_fresh', $content );
260  $logger->debug( "Timestamp-based cache hit for key '{key}'.", $logContext );
261  } elseif ( $user->isAnon() ) {
262  $lastEdit = $this->lastEditTime( $user );
263  $cacheTime = $editInfo->output->getCacheTime();
264  if ( $lastEdit < $cacheTime ) {
265  // Logged-out user made no local upload/template edits in the meantime
266  $this->incrStatsByContent( 'cache_hits.presumed_fresh', $content );
267  $logger->debug( "Edit check based cache hit for key '{key}'.", $logContext );
268  } else {
269  $isCacheUsable = false;
270  $this->incrStatsByContent( 'cache_misses.proven_stale', $content );
271  $logger->info( "Stale cache for key '{key}' due to outside edits.", $logContext );
272  }
273  } else {
274  if ( $editInfo->edits === $this->userEditTracker->getUserEditCount( $user ) ) {
275  // Logged-in user made no local upload/template edits in the meantime
276  $this->incrStatsByContent( 'cache_hits.presumed_fresh', $content );
277  $logger->debug( "Edit count based cache hit for key '{key}'.", $logContext );
278  } else {
279  $isCacheUsable = false;
280  $this->incrStatsByContent( 'cache_misses.proven_stale', $content );
281  $logger->info( "Stale cache for key '{key}'due to outside edits.", $logContext );
282  }
283  }
284 
285  if ( !$isCacheUsable ) {
286  return false;
287  }
288 
289  if ( $editInfo->output->getFlag( 'vary-revision' ) ) {
290  // This can be used for the initial parse, e.g. for filters or doUserEditContent(),
291  // but a second parse will be triggered in doEditUpdates() no matter what
292  $logger->info(
293  "Cache for key '{key}' has vary-revision; post-insertion parse inevitable.",
294  $logContext
295  );
296  } else {
297  static $flagsMaybeReparse = [
298  // Similar to the above if we didn't guess the ID correctly
299  'vary-revision-id',
300  // Similar to the above if we didn't guess the timestamp correctly
301  'vary-revision-timestamp',
302  // Similar to the above if we didn't guess the content correctly
303  'vary-revision-sha1',
304  // Similar to the above if we didn't guess page ID correctly
305  'vary-page-id'
306  ];
307  foreach ( $flagsMaybeReparse as $flag ) {
308  if ( $editInfo->output->getFlag( $flag ) ) {
309  $logger->debug(
310  "Cache for key '{key}' has $flag; post-insertion parse possible.",
311  $logContext
312  );
313  }
314  }
315  }
316 
317  return $editInfo;
318  }
319 
324  private function incrStatsByContent( $subkey, Content $content ) {
325  $this->stats->increment( 'editstash.' . $subkey ); // overall for b/c
326  $this->stats->increment( 'editstash_by_model.' . $content->getModel() . '.' . $subkey );
327  }
328 
333  private function getAndWaitForStashValue( $key ) {
334  $editInfo = $this->getStashValue( $key );
335 
336  if ( !$editInfo ) {
337  $start = microtime( true );
338  // We ignore user aborts and keep parsing. Block on any prior parsing
339  // so as to use its results and make use of the time spent parsing.
340  // Skip this logic if there no primary connection in case this method
341  // is called on an HTTP GET request for some reason.
342  $dbw = $this->lb->getAnyOpenConnection( $this->lb->getWriterIndex() );
343  if ( $dbw && $dbw->lock( $key, __METHOD__, 30 ) ) {
344  $editInfo = $this->getStashValue( $key );
345  $dbw->unlock( $key, __METHOD__ );
346  }
347 
348  $timeMs = 1000 * max( 0, microtime( true ) - $start );
349  $this->stats->timing( 'editstash.lock_wait_time', $timeMs );
350  }
351 
352  return $editInfo;
353  }
354 
359  public function fetchInputText( $textHash ) {
360  $textKey = $this->cache->makeKey( 'stashedit', 'text', $textHash );
361 
362  return $this->cache->get( $textKey );
363  }
364 
370  public function stashInputText( $text, $textHash ) {
371  $textKey = $this->cache->makeKey( 'stashedit', 'text', $textHash );
372 
373  return $this->cache->set(
374  $textKey,
375  $text,
376  self::MAX_CACHE_TTL,
378  );
379  }
380 
385  private function lastEditTime( UserIdentity $user ) {
386  $db = $this->lb->getConnectionRef( DB_REPLICA );
387 
388  $time = $db->newSelectQueryBuilder()
389  ->select( 'MAX(rc_timestamp)' )
390  ->from( 'recentchanges' )
391  ->join( 'actor', null, 'actor_id=rc_actor' )
392  ->where( [ 'actor_name' => $user->getName() ] )
393  ->caller( __METHOD__ )
394  ->fetchField();
395 
396  return wfTimestampOrNull( TS_MW, $time );
397  }
398 
405  private function getContentHash( Content $content ) {
406  return sha1( implode( "\n", [
407  $content->getModel(),
408  $content->getDefaultFormat(),
409  $content->serialize( $content->getDefaultFormat() )
410  ] ) );
411  }
412 
425  private function getStashKey( PageIdentity $page, $contentHash, UserIdentity $user ) {
426  return $this->cache->makeKey(
427  'stashedit-info-v1',
428  md5( "{$page->getNamespace()}\n{$page->getDBkey()}" ),
429  // Account for the edit model/text
430  $contentHash,
431  // Account for user name related variables like signatures
432  md5( "{$user->getId()}\n{$user->getName()}" )
433  );
434  }
435 
440  private function getStashValue( $key ) {
441  $stashInfo = $this->cache->get( $key );
442  if ( is_object( $stashInfo ) && $stashInfo->output instanceof ParserOutput ) {
443  return $stashInfo;
444  }
445 
446  return false;
447  }
448 
461  private function storeStashValue(
462  $key,
463  Content $pstContent,
464  ParserOutput $parserOutput,
465  $timestamp,
466  UserIdentity $user
467  ) {
468  // If an item is renewed, mind the cache TTL determined by config and parser functions.
469  // Put an upper limit on the TTL for sanity to avoid extreme template/file staleness.
470  $age = time() - (int)wfTimestamp( TS_UNIX, $parserOutput->getCacheTime() );
471  $ttl = min( $parserOutput->getCacheExpiry() - $age, self::MAX_CACHE_TTL );
472  // Avoid extremely stale user signature timestamps (T84843)
473  if ( $parserOutput->getFlag( 'user-signature' ) ) {
474  $ttl = min( $ttl, self::MAX_SIGNATURE_TTL );
475  }
476 
477  if ( $ttl <= 0 ) {
478  return 'uncacheable'; // low TTL due to a tag, magic word, or signature?
479  }
480 
481  // Store what is actually needed and split the output into another key (T204742)
482  $stashInfo = (object)[
483  'pstContent' => $pstContent,
484  'output' => $parserOutput,
485  'timestamp' => $timestamp,
486  'edits' => $user->isRegistered() ? $this->userEditTracker->getUserEditCount( $user ) : null,
487  ];
488 
489  $ok = $this->cache->set( $key, $stashInfo, $ttl, BagOStuff::WRITE_ALLOW_SEGMENTS );
490  if ( $ok ) {
491  // These blobs can waste slots in low cardinality memcached slabs
492  $this->pruneExcessStashedEntries( $user, $key );
493  }
494 
495  return $ok ? true : 'store_error';
496  }
497 
502  private function pruneExcessStashedEntries( UserIdentity $user, $newKey ) {
503  $key = $this->cache->makeKey( 'stash-edit-recent', sha1( $user->getName() ) );
504 
505  $keyList = $this->cache->get( $key ) ?: [];
506  if ( count( $keyList ) >= self::MAX_CACHE_RECENT ) {
507  $oldestKey = array_shift( $keyList );
508  $this->cache->delete( $oldestKey, BagOStuff::WRITE_PRUNE_SEGMENTS );
509  }
510 
511  $keyList[] = $newKey;
512  $this->cache->set( $key, $keyList, 2 * self::MAX_CACHE_TTL );
513  }
514 
519  private function recentStashEntryCount( UserIdentity $user ) {
520  $key = $this->cache->makeKey( 'stash-edit-recent', sha1( $user->getName() ) );
521 
522  return count( $this->cache->get( $key ) ?: [] );
523  }
524 }
MediaWiki\Storage\PageEditStash\stashInputText
stashInputText( $text, $textHash)
Definition: PageEditStash.php:370
Page\PageIdentity
Interface for objects (potentially) representing an editable wiki page.
Definition: PageIdentity.php:64
MediaWiki\Storage\PageEditStash\fetchInputText
fetchInputText( $textHash)
Definition: PageEditStash.php:359
CacheTime\getCacheExpiry
getCacheExpiry()
Returns the number of seconds after which this object should expire.
Definition: CacheTime.php:142
ParserOutput
Definition: ParserOutput.php:31
User\isAnon
isAnon()
Get whether the user is anonymous.
Definition: User.php:2985
MediaWiki\Storage\PageEditStash\ERROR_PARSE
const ERROR_PARSE
Definition: PageEditStash.php:67
User\isBot
isBot()
Definition: User.php:2993
MediaWiki\Storage\PageEditStash\__construct
__construct(BagOStuff $cache, ILoadBalancer $lb, LoggerInterface $logger, StatsdDataFactoryInterface $stats, UserEditTracker $userEditTracker, UserFactory $userFactory, HookContainer $hookContainer, $initiator)
Definition: PageEditStash.php:91
true
return true
Definition: router.php:90
MediaWiki\Storage\PageEditStash\getContentHash
getContentHash(Content $content)
Get hash of the content, factoring in model/format.
Definition: PageEditStash.php:405
wfTimestamp
wfTimestamp( $outputtype=TS_UNIX, $ts=0)
Get a timestamp string in one of various formats.
Definition: GlobalFunctions.php:1691
MediaWiki\Storage\PageEditStash\checkCache
checkCache(PageIdentity $page, Content $content, User $user)
Check that a prepared edit is in cache and still up-to-date.
Definition: PageEditStash.php:219
BagOStuff\WRITE_ALLOW_SEGMENTS
const WRITE_ALLOW_SEGMENTS
Definition: BagOStuff.php:120
MediaWiki\Storage\PageEditStash\incrStatsByContent
incrStatsByContent( $subkey, Content $content)
Definition: PageEditStash.php:324
WikiPage
Class representing a MediaWiki article and history.
Definition: WikiPage.php:60
MediaWiki\Storage\PageEditStash\ERROR_CACHE
const ERROR_CACHE
Definition: PageEditStash.php:68
BagOStuff
Class representing a cache/ephemeral data store.
Definition: BagOStuff.php:86
MediaWiki\Storage\PageEditStash\$userEditTracker
UserEditTracker $userEditTracker
Definition: PageEditStash.php:60
WikiPage\prepareContentForEdit
prepareContentForEdit(Content $content, RevisionRecord $revision=null, UserIdentity $user=null, $serialFormat=null, $useCache=true)
Prepare content which is about to be saved.
Definition: WikiPage.php:2055
MediaWiki\User\UserIdentity
Interface for objects representing user identity.
Definition: UserIdentity.php:39
MediaWiki\Storage\PageEditStash\$logger
LoggerInterface $logger
Definition: PageEditStash.php:54
User\getRequest
getRequest()
Get the WebRequest object to use with this object.
Definition: User.php:3074
MediaWiki\Storage\PageEditStash\$hookRunner
ParserOutputStashForEditHook $hookRunner
Definition: PageEditStash.php:58
MediaWiki\Storage\PageEditStash\PRESUME_FRESH_TTL_SEC
const PRESUME_FRESH_TTL_SEC
Definition: PageEditStash.php:72
MediaWiki\Storage\Hook\ParserOutputStashForEditHook
This is a hook handler interface, see docs/Hooks.md.
Definition: ParserOutputStashForEditHook.php:17
MediaWiki\Storage\PageEditStash
Class for managing stashed edits used by the page updater classes.
Definition: PageEditStash.php:48
MediaWiki\User\UserIdentity\isRegistered
isRegistered()
ParserOutput\getFlag
getFlag( $flag)
Definition: ParserOutput.php:1076
MediaWiki\Storage\PageEditStash\$userFactory
UserFactory $userFactory
Definition: PageEditStash.php:62
wfTimestampOrNull
wfTimestampOrNull( $outputtype=TS_UNIX, $ts=null)
Return a formatted timestamp, or null if input is null.
Definition: GlobalFunctions.php:1707
MediaWiki\User\UserIdentity\getName
getName()
MediaWiki\Storage\PageEditStash\getStashKey
getStashKey(PageIdentity $page, $contentHash, UserIdentity $user)
Get the temporary prepared edit stash key for a user.
Definition: PageEditStash.php:425
MediaWiki\Storage\PageEditStash\lastEditTime
lastEditTime(UserIdentity $user)
Definition: PageEditStash.php:385
DB_REPLICA
const DB_REPLICA
Definition: defines.php:25
MediaWiki\Storage\PageEditStash\$initiator
int $initiator
Definition: PageEditStash.php:64
BagOStuff\WRITE_PRUNE_SEGMENTS
const WRITE_PRUNE_SEGMENTS
Definition: BagOStuff.php:121
MediaWiki\Storage\PageEditStash\getAndWaitForStashValue
getAndWaitForStashValue( $key)
Definition: PageEditStash.php:333
$content
$content
Definition: router.php:76
DB_PRIMARY
const DB_PRIMARY
Definition: defines.php:27
MediaWiki\Storage\PageEditStash\storeStashValue
storeStashValue( $key, Content $pstContent, ParserOutput $parserOutput, $timestamp, UserIdentity $user)
Build a value to store in memcached based on the PST content and parser output.
Definition: PageEditStash.php:461
MediaWiki\Storage\PageEditStash\$stats
StatsdDataFactoryInterface $stats
Definition: PageEditStash.php:56
MediaWiki\Storage\PageEditStash\pruneExcessStashedEntries
pruneExcessStashedEntries(UserIdentity $user, $newKey)
Definition: PageEditStash.php:502
MediaWiki\Storage\PageEditStash\getStashValue
getStashValue( $key)
Definition: PageEditStash.php:440
MediaWiki\Storage
Definition: BlobAccessException.php:23
Content
Base interface for content objects.
Definition: Content.php:35
MediaWiki\Storage\PageEditStash\INITIATOR_USER
const INITIATOR_USER
Definition: PageEditStash.php:78
MediaWiki\User\UserEditTracker
Track info about user edit counts and timings.
Definition: UserEditTracker.php:21
MediaWiki\Storage\PageEditStash\ERROR_BUSY
const ERROR_BUSY
Definition: PageEditStash.php:70
MediaWiki\Storage\PageEditStash\ERROR_UNCACHEABLE
const ERROR_UNCACHEABLE
Definition: PageEditStash.php:69
MediaWiki\HookContainer\HookContainer
HookContainer class.
Definition: HookContainer.php:45
MediaWiki\Storage\PageEditStash\$lb
ILoadBalancer $lb
Definition: PageEditStash.php:52
MediaWiki\HookContainer\HookRunner
This class provides an implementation of the core hook interfaces, forwarding hook calls to HookConta...
Definition: HookRunner.php:552
MediaWiki\Storage\PageEditStash\recentStashEntryCount
recentStashEntryCount(UserIdentity $user)
Definition: PageEditStash.php:519
MediaWiki\Storage\PageEditStash\MAX_SIGNATURE_TTL
const MAX_SIGNATURE_TTL
Definition: PageEditStash.php:74
CacheTime\getCacheTime
getCacheTime()
Definition: CacheTime.php:65
User
The User object encapsulates all of the user-specific settings (user_id, name, rights,...
Definition: User.php:68
MediaWiki\Storage\PageEditStash\MAX_CACHE_RECENT
const MAX_CACHE_RECENT
Definition: PageEditStash.php:76
MediaWiki\Storage\PageEditStash\INITIATOR_JOB_OR_CLI
const INITIATOR_JOB_OR_CLI
Definition: PageEditStash.php:79
MediaWiki\Storage\PageEditStash\MAX_CACHE_TTL
const MAX_CACHE_TTL
Definition: PageEditStash.php:73
MediaWiki\Storage\PageEditStash\parseAndCache
parseAndCache(WikiPage $page, Content $content, UserIdentity $user, string $summary)
Definition: PageEditStash.php:118
User\getName
getName()
Get the user name, or the IP of an anonymous user.
Definition: User.php:2115
MediaWiki\User\UserFactory
Creates User objects.
Definition: UserFactory.php:41
Wikimedia\Rdbms\ILoadBalancer
Database cluster connection, tracking, load balancing, and transaction manager interface.
Definition: ILoadBalancer.php:81
MediaWiki\Storage\PageEditStash\$cache
BagOStuff $cache
Definition: PageEditStash.php:50
MediaWiki\Storage\PageEditStash\ERROR_NONE
const ERROR_NONE
Definition: PageEditStash.php:66