Translate extension for MediaWiki
 
Loading...
Searching...
No Matches
TranslatablePageMarker.php
1<?php
2declare( strict_types = 1 );
3
4namespace MediaWiki\Extension\Translate\PageTranslation;
5
6use ContentHandler;
7use JobQueueGroup;
8use LogicException;
9use MalformedTitleException;
10use ManualLogEntry;
11use MediaWiki\CommentStore\CommentStoreComment;
16use MediaWiki\Linker\LinkRenderer;
17use MediaWiki\Page\PageRecord;
18use MediaWiki\Page\WikiPageFactory;
19use MediaWiki\Permissions\Authority;
20use MediaWiki\Revision\SlotRecord;
21use MediaWiki\Status\Status;
22use MediaWiki\User\User;
23use MediaWiki\User\UserIdentity;
24use Message;
25use RecentChange;
26use TitleFormatter;
27use TitleParser;
28use Wikimedia\Rdbms\ILoadBalancer;
30
36 public const LATEST_SYNTAX_VERSION = '2';
37 public const DEFAULT_SYNTAX_VERSION = '1';
38
39 private ILoadBalancer $loadBalancer;
40 private JobQueueGroup $jobQueueGroup;
41 private LinkRenderer $linkRenderer;
42 private MessageGroups $messageGroups;
43 private MessageIndex $messageIndex;
44 private TitleFormatter $titleFormatter;
45 private TitleParser $titleParser;
46 private TranslatablePageParser $translatablePageParser;
47 private TranslatablePageStore $translatablePageStore;
48 private TranslatablePageStateStore $translatablePageStateStore;
49 private TranslationUnitStoreFactory $translationUnitStoreFactory;
50 private MessageGroupMetadata $messageGroupMetadata;
51 private WikiPageFactory $wikiPageFactory;
52 private TranslatablePageView $translatablePageView;
53
54 public function __construct(
55 ILoadBalancer $loadBalancer,
56 JobQueueGroup $jobQueueGroup,
57 LinkRenderer $linkRenderer,
58 MessageGroups $messageGroups,
59 MessageIndex $messageIndex,
60 TitleFormatter $titleFormatter,
61 TitleParser $titleParser,
62 TranslatablePageParser $translatablePageParser,
63 TranslatablePageStore $translatablePageStore,
64 TranslatablePageStateStore $translatablePageStateStore,
65 TranslationUnitStoreFactory $translationUnitStoreFactory,
66 MessageGroupMetadata $messageGroupMetadata,
67 WikiPageFactory $wikiPageFactory,
68 TranslatablePageView $translatablePageView
69 ) {
70 $this->loadBalancer = $loadBalancer;
71 $this->jobQueueGroup = $jobQueueGroup;
72 $this->linkRenderer = $linkRenderer;
73 $this->messageIndex = $messageIndex;
74 $this->titleFormatter = $titleFormatter;
75 $this->titleParser = $titleParser;
76 $this->translatablePageParser = $translatablePageParser;
77 $this->translatablePageStore = $translatablePageStore;
78 $this->translatablePageStateStore = $translatablePageStateStore;
79 $this->translationUnitStoreFactory = $translationUnitStoreFactory;
80 $this->wikiPageFactory = $wikiPageFactory;
81 $this->messageGroups = $messageGroups;
82 $this->messageGroupMetadata = $messageGroupMetadata;
83 $this->translatablePageView = $translatablePageView;
84 }
85
93 public function unmarkPage( TranslatablePage $page, User $user, bool $removeMarkup ): void {
94 if ( $removeMarkup ) {
95 $content = ContentHandler::makeContent(
96 $page->getStrippedSourcePageText(),
97 $page->getTitle()
98 );
99
100 $status = $this->wikiPageFactory->newFromTitle( $page->getPageIdentity() )->doUserEditContent(
101 $content,
102 $user,
103 Message::newFromKey( 'tpt-unlink-summary' )->inContentLanguage()->text(),
104 EDIT_FORCE_BOT | EDIT_UPDATE
105 );
106
107 if ( !$status->isOK() ) {
108 throw new TranslatablePageMarkException( [ 'tpt-edit-failed', $status->getWikiText() ] );
109 }
110 }
111
112 $this->translatablePageStore->unmark( $page->getPageIdentity() );
113
114 $entry = new ManualLogEntry( 'pagetranslation', 'unmark' );
115 $entry->setPerformer( $user );
116 $entry->setTarget( $page->getPageIdentity() );
117 $logId = $entry->insert();
118 $entry->publish( $logId );
119 }
120
131 public function getMarkOperation(
132 PageRecord $page,
133 ?int $revision,
134 bool $validateUnitTitle
136 $latestRevID = $page->getLatest();
137 if ( $revision === null ) {
138 // Get the latest revision
139 $revision = $latestRevID;
140 }
141
142 // This also catches the case where revision does not belong to the title
143 if ( $revision !== $latestRevID ) {
144 // We do want to notify the reviewer if the underlying page changes during review
145 $link = $this->linkRenderer->makeKnownLink(
146 $page,
147 (string)$revision,
148 [],
149 [ 'oldid' => (string)$revision ]
150 );
152 'tpt-oldrevision',
153 $this->titleFormatter->getPrefixedText( $page ),
154 Message::rawParam( $link )
155 ] );
156 }
157
158 // newFromRevision never fails, but getReadyTag might fail if revision does not belong
159 // to the page (checked above)
160 $translatablePage = TranslatablePage::newFromRevision( $page, $revision );
161 if ( $translatablePage->getReadyTag() !== $latestRevID ) {
162 throw new TranslatablePageMarkException( [
163 'tpt-notsuitable',
164 $this->titleFormatter->getPrefixedText( $page ),
165 Message::plaintextParam( '<translate>' )
166 ] );
167 }
168
169 $parserOutput = $this->translatablePageParser->parse( $translatablePage->getText() );
170 [ $units, $deletedUnits ] = $this->prepareTranslationUnits( $translatablePage, $parserOutput );
171
172 $unitValidationStatus = $this->validateUnitNames(
173 $translatablePage,
174 $units,
175 $validateUnitTitle
176 );
177
178 return new TranslatablePageMarkOperation(
179 $translatablePage,
180 $parserOutput,
181 $units,
182 $deletedUnits,
183 $translatablePage->getMarkedTag() === null,
184 $unitValidationStatus
185 );
186 }
187
198 private function validateUnitNames(
199 TranslatablePage $page,
200 array $units,
201 bool $includePageDisplayTitle
202 ): Status {
203 $usedNames = [];
204 $status = Status::newGood();
205 $ic = preg_quote( TranslationUnit::UNIT_MARKER_INVALID_CHARS, '~' );
206 foreach ( $units as $key => $s ) {
207 $unitStatus = Status::newGood();
208 if ( $includePageDisplayTitle || $key !== TranslatablePage::DISPLAY_TITLE_UNIT_ID ) {
209 // xx-yyyyyyyyyy represents a long language code. 2 more characters than nl-informal which
210 // is the longest non-redirect language code in language-data
211 $pageTitle = $this->titleFormatter->getPrefixedText( $page->getPageIdentity() );
212 $longestUnitTitle = "Translations:$pageTitle/{$s->id}/xx-yyyyyyyyyy";
213 try {
214 $this->titleParser->parseTitle( $longestUnitTitle );
215 } catch ( MalformedTitleException $e ) {
216 if ( $e->getErrorMessage() === 'title-invalid-too-long' ) {
217 $unitStatus->fatal(
218 'tpt-unit-title-too-long',
219 $s->id,
220 Message::numParam( strlen( $longestUnitTitle ) ),
221 $e->getErrorMessageParameters()[ 0 ],
222 $pageTitle
223 );
224 } else {
225 $unitStatus->fatal( 'tpt-unit-title-invalid', $s->id, $e->getMessageObject() );
226 }
227 }
228
229 // Only perform custom validation if the TitleParser validation passed
230 if ( $unitStatus->isGood() && preg_match( "~[$ic]~", $s->id ) ) {
231 $unitStatus->fatal( 'tpt-invalid', $s->id );
232 }
233 }
234
235 // We need to do checks for both new and existing units. Someone might have tampered with the
236 // page source adding duplicate or invalid markers.
237 if ( isset( $usedNames[$s->id] ) ) {
238 // If the same ID is used three or more times, the same
239 // error will be added more than once, but that's okay,
240 // Status::fatal will deduplicate
241 $unitStatus->fatal( 'tpt-duplicate', $s->id );
242 }
243 $usedNames[$s->id] = true;
244
245 $status->merge( $unitStatus );
246 }
247
248 return $status;
249 }
250
267 public function markForTranslation(
269 TranslatablePageSettings $pageSettings,
270 User $user
271 ): int {
272 if ( !$operation->isValid() ) {
273 throw new LogicException( 'Trying to mark a page for translation that is not valid' );
274 }
275
276 $page = $operation->getPage();
277 $newRevisionId = $this->updateSectionMarkers( $page, $user, $operation );
278 // Probably a no-change edit, so no new revision was assigned. Get the latest revision manually
279 // Could also occur on the off chance $newRevisionRecord->getId() returns null
280 $newRevisionId ??= $page->getTitle()->getLatestRevID();
281
282 $inserts = [];
283 $changed = [];
284 $groupId = $page->getMessageGroupId();
285 $maxId = (int)$this->messageGroupMetadata->get( $groupId, 'maxid' );
286
287 $pageId = $page->getTitle()->getArticleID();
288 $sections = $pageSettings->shouldTranslateTitle()
289 ? $operation->getUnits()
290 : array_filter(
291 $operation->getUnits(),
292 static fn ( TranslationUnit $s ) => $s->id !== TranslatablePage::DISPLAY_TITLE_UNIT_ID
293 );
294
295 foreach ( array_values( $sections ) as $index => $s ) {
296 $maxId = max( $maxId, (int)$s->id );
297 $changed[] = $s->id;
298
299 if ( in_array( $s->id, $pageSettings->getNoFuzzyUnits(), true ) ) {
300 // UpdateTranslatablePageJob will only fuzzy when type is changed
301 $s->type = 'old';
302 }
303
304 $inserts[] = [
305 'trs_page' => $pageId,
306 'trs_key' => $s->id,
307 'trs_text' => $s->getText(),
308 'trs_order' => $index
309 ];
310 }
311
312 $dbw = $this->loadBalancer->getConnection( DB_PRIMARY );
313 $dbw->delete(
314 'translate_sections',
315 [ 'trs_page' => $page->getTitle()->getArticleID() ],
316 __METHOD__
317 );
318 $dbw->insert( 'translate_sections', $inserts, __METHOD__ );
319
320 $this->saveMetadata( $operation, $pageSettings, $maxId, $user );
321
322 $page->addMarkedTag( $newRevisionId );
323
324 if ( $this->translatablePageView->isTranslationBannerNamespaceConfigured() ) {
325 $this->translatablePageStateStore->remove( $page->getPageIdentity() );
326 }
327
328 // TODO: Ideally we would only invalidate translatable page message group cache
329 $this->messageGroups->recache();
330
331 $group = new WikiPageMessageGroup( $groupId, $page->getTitle() );
332 $newKeys = $group->makeGroupKeys( $changed );
333 // Interim cache is temporary cache to make new message groups keys known
334 // until MessageIndex is rebuilt (which can take a long time)
335 $this->messageIndex->storeInterim( $group, $newKeys );
336
337 $job = UpdateTranslatablePageJob::newFromPage( $page, $sections );
338 $this->jobQueueGroup->push( $job );
339
340 // Logging
341 $entry = new ManualLogEntry( 'pagetranslation', 'mark' );
342 $entry->setPerformer( $user );
343 $entry->setTarget( $page->getTitle() );
344 $entry->setParameters( [
345 'revision' => $newRevisionId,
346 'changed' => count( $changed ),
347 ] );
348 $logId = $entry->insert();
349 $entry->publish( $logId );
350
351 // Clear more caches
352 $page->getTitle()->invalidateCache();
353
354 return count( $sections );
355 }
356
357 private function saveMetadata(
358 TranslatablePageMarkOperation $operation,
359 TranslatablePageSettings $pageSettings,
360 int $maxId,
361 UserIdentity $user
362 ): void {
363 $page = $operation->getPage();
364 $groupId = $page->getMessageGroupId();
365
366 $this->messageGroupMetadata->set( $groupId, 'maxid', (string)$maxId );
367 if ( $pageSettings->shouldForceLatestSyntaxVersion() || $operation->isFirstMark() ) {
368 $this->messageGroupMetadata->set( $groupId, 'version', self::LATEST_SYNTAX_VERSION );
369 }
370
371 $this->messageGroupMetadata->set(
372 $groupId,
373 'transclusion',
374 $pageSettings->shouldEnableTransclusion() ? '1' : '0'
375 );
376
377 $this->handlePriorityLanguages( $operation->getPage(), $pageSettings, $user );
378 }
379
380 private function handlePriorityLanguages(
381 TranslatablePage $page,
382 TranslatablePageSettings $pageSettings,
383 UserIdentity $user
384 ): void {
385 $languages = $pageSettings->getPriorityLanguages() ?
386 implode( ',', $pageSettings->getPriorityLanguages() ) :
387 false;
388 $force = $pageSettings->shouldForcePriorityLanguage() ? 'on' : false;
389 $hasPriorityConfig = $languages || $force;
390
391 // We use the reason if priority force and / or priority languages are set
392 // Otherwise just a reason doesn't make sense
393 if ( $hasPriorityConfig && $pageSettings->getPriorityLanguageComment() !== '' ) {
394 $reason = $pageSettings->getPriorityLanguageComment();
395 } else {
396 $reason = false;
397 }
398
399 $groupId = $page->getMessageGroupId();
400 // old metadata
401 $opLanguages = $this->messageGroupMetadata->get( $groupId, 'prioritylangs' );
402 $opForce = $this->messageGroupMetadata->get( $groupId, 'priorityforce' );
403 $opReason = $this->messageGroupMetadata->get( $groupId, 'priorityreason' );
404
405 $this->messageGroupMetadata->set( $groupId, 'prioritylangs', $languages );
406 $this->messageGroupMetadata->set( $groupId, 'priorityforce', $force );
407 $this->messageGroupMetadata->set( $groupId, 'priorityreason', $reason );
408
409 if (
410 $opLanguages !== $languages ||
411 // Since 2024.04, we started storing false instead of 'off' to avoid additional storage
412 // Remove after 2024.07 MLEB release
413 ( $opForce !== $force && !( $force === false && $opForce === 'off' ) ) ||
414 // Since 2024.04, empty reason values are no longer stored.
415 // Remove casting to string after 2024.07 MLEB release
416 ( (string)$opReason !== (string)$reason )
417 ) {
418 $logComment = $reason === false ? '' : $reason;
419 $params = [
420 'languages' => $languages,
421 'force' => $force,
422 'reason' => $reason,
423 ];
424
425 $entry = new ManualLogEntry( 'pagetranslation', 'prioritylanguages' );
426 $entry->setPerformer( $user );
427 $entry->setTarget( $page->getTitle() );
428 $entry->setParameters( $params );
429 $entry->setComment( $logComment );
430 $logId = $entry->insert();
431 $entry->publish( $logId );
432 }
433 }
434
435 private function prepareTranslationUnits( TranslatablePage $page, ParserOutput $parserOutput ): array {
436 $highest = (int)$this->messageGroupMetadata->get( $page->getMessageGroupId(), 'maxid' );
437
438 $store = $this->translationUnitStoreFactory->getReader( $page->getPageIdentity() );
439 $storedUnits = $store->getUnits();
440
441 // Prepend the display title unit, which is not part of the page contents
442 $displayTitle = new TranslationUnit(
443 $this->titleFormatter->getPrefixedText( $page->getPageIdentity() ),
444 TranslatablePage::DISPLAY_TITLE_UNIT_ID
445 );
446
447 $units = [ TranslatablePage::DISPLAY_TITLE_UNIT_ID => $displayTitle ] + $parserOutput->units();
448
449 // Figure out the largest used translation unit id
450 foreach ( array_keys( $storedUnits ) as $key ) {
451 $highest = max( $highest, (int)$key );
452 }
453 foreach ( $units as $_ ) {
454 $highest = max( $highest, (int)$_->id );
455 }
456
457 foreach ( $units as $s ) {
458 $s->type = 'old';
459
460 if ( $s->id === TranslationUnit::NEW_UNIT_ID ) {
461 $s->type = 'new';
462 $s->id = (string)( ++$highest );
463 } else {
464 if ( isset( $storedUnits[$s->id] ) ) {
465 $storedText = $storedUnits[$s->id]->text;
466 if ( $s->text !== $storedText ) {
467 $s->type = 'changed';
468 $s->oldText = $storedText;
469 }
470 }
471 }
472 }
473
474 // Figure out which units were deleted by removing the still existing units
475 $deletedUnits = $storedUnits;
476 foreach ( $units as $s ) {
477 unset( $deletedUnits[$s->id] );
478 }
479
480 return [ $units, $deletedUnits ];
481 }
482
483 private function updateSectionMarkers(
484 TranslatablePage $page,
485 Authority $authority,
486 TranslatablePageMarkOperation $operation
487 ): ?int {
488 $pageUpdater = $this->wikiPageFactory->newFromTitle( $page->getTitle() )->newPageUpdater( $authority );
489 $content = ContentHandler::makeContent(
490 $operation->getParserOutput()->sourcePageTextForSaving(),
491 $page->getTitle()
492 );
493 $comment = CommentStoreComment::newUnsavedComment(
494 Message::newFromKey( 'tpt-mark-summary' )->inContentLanguage()->text()
495 );
496
497 $pageUpdater->setContent( SlotRecord::MAIN, $content );
498 if ( $authority->authorizeWrite( 'autopatrol', $page->getTitle() ) ) {
499 $pageUpdater->setRcPatrolStatus( RecentChange::PRC_AUTOPATROLLED );
500 }
501 $newRevisionRecord = $pageUpdater->saveRevision( $comment, EDIT_FORCE_BOT | EDIT_UPDATE );
502
503 $status = $pageUpdater->getStatus();
504 if ( !$status->isOK() ) {
505 throw new TranslatablePageMarkException( [ 'tpt-edit-failed', $status->getMessage() ] );
506 }
507
508 return $newRevisionRecord !== null ? $newRevisionRecord->getId() : null;
509 }
510}
Factory class for accessing message groups individually by id or all of them as a list.
Creates a database of keys in all groups, so that namespace and key can be used to get the groups the...
Offers functionality for reading and updating Translate group related metadata.
Exception thrown when TranslatablePageMarker is unable to unmark a page for translation.
This class encapsulates the information / state needed to mark a page for translation.
Service to mark/unmark pages from translation and perform related validations.
getMarkOperation(PageRecord $page, ?int $revision, bool $validateUnitTitle)
Parse the given page and create a new MarkPageOperation with the page and the given revision if the r...
unmarkPage(TranslatablePage $page, User $user, bool $removeMarkup)
Remove a page from translation.
markForTranslation(TranslatablePageMarkOperation $operation, TranslatablePageSettings $pageSettings, User $user)
This function does the heavy duty of marking a page.
Generates ParserOutput from text or removes all tags from a text.
Value object containing user configurable settings when marking a page for translation.
Logic and code to generate various aspects related to how translatable pages are displayed.
Mixed bag of methods related to translatable pages.
static newFromRevision(PageIdentity $title, int $revision)
Constructs a translatable page from given revision.
This class represents one translation unit in a translatable page.
Wraps the translatable page sections into a message group.