Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
96.54% covered (success)
96.54%
223 / 231
55.56% covered (warning)
55.56%
5 / 9
CRAP
0.00% covered (danger)
0.00%
0 / 1
TranslatablePageMarker
96.54% covered (success)
96.54%
223 / 231
55.56% covered (warning)
55.56%
5 / 9
47
0.00% covered (danger)
0.00%
0 / 1
 __construct
100.00% covered (success)
100.00%
13 / 13
100.00% covered (success)
100.00%
1 / 1
1
 unmarkPage
94.74% covered (success)
94.74%
18 / 19
0.00% covered (danger)
0.00%
0 / 1
3.00
 getMarkOperation
100.00% covered (success)
100.00%
37 / 37
100.00% covered (success)
100.00%
1 / 1
4
 validateUnitNames
100.00% covered (success)
100.00%
26 / 26
100.00% covered (success)
100.00%
1 / 1
9
 markForTranslation
96.23% covered (success)
96.23%
51 / 53
0.00% covered (danger)
0.00%
0 / 1
5
 saveMetadata
100.00% covered (success)
100.00%
11 / 11
100.00% covered (success)
100.00%
1 / 1
4
 handlePriorityLanguages
86.67% covered (warning)
86.67%
26 / 30
0.00% covered (danger)
0.00%
0 / 1
9.19
 prepareTranslationUnits
100.00% covered (success)
100.00%
26 / 26
100.00% covered (success)
100.00%
1 / 1
8
 updateSectionMarkers
93.75% covered (success)
93.75%
15 / 16
0.00% covered (danger)
0.00%
0 / 1
4.00
1<?php
2declare( strict_types = 1 );
3
4namespace MediaWiki\Extension\Translate\PageTranslation;
5
6use ContentHandler;
7use JobQueueGroup;
8use LogicException;
9use MalformedTitleException;
10use ManualLogEntry;
11use MediaWiki\CommentStore\CommentStoreComment;
12use MediaWiki\Extension\Translate\MessageGroupProcessing\MessageGroups;
13use MediaWiki\Extension\Translate\MessageGroupProcessing\TranslatablePageStore;
14use MediaWiki\Extension\Translate\MessageLoading\MessageIndex;
15use MediaWiki\Extension\Translate\MessageProcessing\MessageGroupMetadata;
16use MediaWiki\Languages\LanguageNameUtils;
17use MediaWiki\Linker\LinkRenderer;
18use MediaWiki\Page\PageRecord;
19use MediaWiki\Page\WikiPageFactory;
20use MediaWiki\Permissions\Authority;
21use MediaWiki\Revision\SlotRecord;
22use MediaWiki\User\UserIdentity;
23use Message;
24use RecentChange;
25use Status;
26use TitleFormatter;
27use TitleParser;
28use User;
29use Wikimedia\Rdbms\ILoadBalancer;
30
31/**
32 * Service to unmark pages from translation
33 * @since 2023.10
34 */
35class TranslatablePageMarker {
36    public const LATEST_SYNTAX_VERSION = '2';
37    public const DEFAULT_SYNTAX_VERSION = '1';
38
39    private ILoadBalancer $loadBalancer;
40    private JobQueueGroup $jobQueueGroup;
41    private LanguageNameUtils $languageNameUtils;
42    private LinkRenderer $linkRenderer;
43    private MessageGroups $messageGroups;
44    private MessageIndex $messageIndex;
45    private TitleFormatter $titleFormatter;
46    private TitleParser $titleParser;
47    private TranslatablePageParser $translatablePageParser;
48    private TranslatablePageStore $translatablePageStore;
49    private TranslationUnitStoreFactory $translationUnitStoreFactory;
50    private MessageGroupMetadata $messageGroupMetadata;
51    private WikiPageFactory $wikiPageFactory;
52
53    public function __construct(
54        ILoadBalancer $loadBalancer,
55        JobQueueGroup $jobQueueGroup,
56        LanguageNameUtils $languageNameUtils,
57        LinkRenderer $linkRenderer,
58        MessageGroups $messageGroups,
59        MessageIndex $messageIndex,
60        TitleFormatter $titleFormatter,
61        TitleParser $titleParser,
62        TranslatablePageParser $translatablePageParser,
63        TranslatablePageStore $translatablePageStore,
64        TranslationUnitStoreFactory $translationUnitStoreFactory,
65        MessageGroupMetadata $messageGroupMetadata,
66        WikiPageFactory $wikiPageFactory
67    ) {
68        $this->loadBalancer = $loadBalancer;
69        $this->jobQueueGroup = $jobQueueGroup;
70        $this->languageNameUtils = $languageNameUtils;
71        $this->linkRenderer = $linkRenderer;
72        $this->messageIndex = $messageIndex;
73        $this->titleFormatter = $titleFormatter;
74        $this->titleParser = $titleParser;
75        $this->translatablePageParser = $translatablePageParser;
76        $this->translatablePageStore = $translatablePageStore;
77        $this->translationUnitStoreFactory = $translationUnitStoreFactory;
78        $this->wikiPageFactory = $wikiPageFactory;
79        $this->messageGroups = $messageGroups;
80        $this->messageGroupMetadata = $messageGroupMetadata;
81    }
82
83    /**
84     * Remove a page from translation.
85     * @param TranslatablePage $page The page to remove from translation
86     * @param User $user The user performing the action
87     * @param bool $removeMarkup Whether to remove markup from the translation page
88     * @throws TranslatablePageMarkException If removing the markup from the translation page fails
89     */
90    public function unmarkPage( TranslatablePage $page, User $user, bool $removeMarkup ): void {
91        if ( $removeMarkup ) {
92            $content = ContentHandler::makeContent(
93                $page->getStrippedSourcePageText(),
94                $page->getTitle()
95            );
96
97            $status = $this->wikiPageFactory->newFromTitle( $page->getPageIdentity() )->doUserEditContent(
98                $content,
99                $user,
100                Message::newFromKey( 'tpt-unlink-summary' )->inContentLanguage()->text(),
101                EDIT_FORCE_BOT | EDIT_UPDATE
102            );
103
104            if ( !$status->isOK() ) {
105                throw new TranslatablePageMarkException( [ 'tpt-edit-failed', $status->getWikiText() ] );
106            }
107        }
108
109        $this->translatablePageStore->unmark( $page->getPageIdentity() );
110
111        $entry = new ManualLogEntry( 'pagetranslation', 'unmark' );
112        $entry->setPerformer( $user );
113        $entry->setTarget( $page->getPageIdentity() );
114        $logId = $entry->insert();
115        $entry->publish( $logId );
116    }
117
118    /**
119     * Parse the given page and create a new MarkPageOperation with the page and the given revision
120     * if the revision is latest and that latest revision is ready to be marked.
121     * @param PageRecord $page
122     * @param ?int $revision Revision to use, or null to use the latest
123     *  revision of the given page (i.e. not do the latest revision check)
124     * @throws TranslatablePageMarkException If the revision was provided and was
125     *  non-latest, or if the latest revision of the page is not ready to be marked
126     * @throws ParsingFailure If the parse fails
127     */
128    public function getMarkOperation(
129        PageRecord $page,
130        ?int $revision,
131        bool $validateUnitTitle
132    ): TranslatablePageMarkOperation {
133        $latestRevID = $page->getLatest();
134        if ( $revision === null ) {
135            // Get the latest revision
136            $revision = $latestRevID;
137        }
138
139        // This also catches the case where revision does not belong to the title
140        if ( $revision !== $latestRevID ) {
141            // We do want to notify the reviewer if the underlying page changes during review
142            $link = $this->linkRenderer->makeKnownLink(
143                $page,
144                (string)$revision,
145                [],
146                [ 'oldid' => (string)$revision ]
147            );
148            throw new TranslatablePageMarkException( [
149                'tpt-oldrevision',
150                $this->titleFormatter->getPrefixedText( $page ),
151                Message::rawParam( $link )
152            ] );
153        }
154
155        // newFromRevision never fails, but getReadyTag might fail if revision does not belong
156        // to the page (checked above)
157        $translatablePage = TranslatablePage::newFromRevision( $page, $revision );
158        if ( $translatablePage->getReadyTag() !== $latestRevID ) {
159            throw new TranslatablePageMarkException( [
160                'tpt-notsuitable',
161                $this->titleFormatter->getPrefixedText( $page ),
162                Message::plaintextParam( '<translate>' )
163            ] );
164        }
165
166        $parserOutput = $this->translatablePageParser->parse( $translatablePage->getText() );
167        [ $units, $deletedUnits ] = $this->prepareTranslationUnits( $translatablePage, $parserOutput );
168
169        $unitValidationStatus = $this->validateUnitNames(
170            $translatablePage,
171            $units,
172            $validateUnitTitle
173        );
174
175        return new TranslatablePageMarkOperation(
176            $translatablePage,
177            $parserOutput,
178            $units,
179            $deletedUnits,
180            $translatablePage->getMarkedTag() === null,
181            $unitValidationStatus
182        );
183    }
184
185    /**
186     * Validate translation unit names.
187     * @param TranslatablePage $page
188     * @param TranslationUnit[] $units
189     * @param bool $includePageDisplayTitle Whether to validate the page display title as
190     * well (notably, it could fail the length validation). Duplicate ID check will be performed
191     * on the page display title even if this is false, as reusing the page display title unit name
192     * for a normal unit is an error for that unit.
193     * @return Status If OK, returns the validated units as a value in the Status object
194     */
195    private function validateUnitNames(
196        TranslatablePage $page,
197        array $units,
198        bool $includePageDisplayTitle
199    ): Status {
200        $usedNames = [];
201        $status = Status::newGood();
202        $ic = preg_quote( TranslationUnit::UNIT_MARKER_INVALID_CHARS, '~' );
203        foreach ( $units as $key => $s ) {
204            $unitStatus = Status::newGood();
205            if ( $includePageDisplayTitle || $key !== TranslatablePage::DISPLAY_TITLE_UNIT_ID ) {
206                // xx-yyyyyyyyyy represents a long language code. 2 more characters than nl-informal which
207                // is the longest non-redirect language code in language-data
208                $pageTitle = $this->titleFormatter->getPrefixedText( $page->getPageIdentity() );
209                $longestUnitTitle = "Translations:$pageTitle/{$s->id}/xx-yyyyyyyyyy";
210                try {
211                    $this->titleParser->parseTitle( $longestUnitTitle );
212                } catch ( MalformedTitleException $e ) {
213                    if ( $e->getErrorMessage() === 'title-invalid-too-long' ) {
214                        $unitStatus->fatal(
215                            'tpt-unit-title-too-long',
216                            $s->id,
217                            Message::numParam( strlen( $longestUnitTitle ) ),
218                            $e->getErrorMessageParameters()[ 0 ],
219                            $pageTitle
220                        );
221                    } else {
222                        $unitStatus->fatal( 'tpt-unit-title-invalid', $s->id, $e->getMessageObject() );
223                    }
224                }
225
226                // Only perform custom validation if the TitleParser validation passed
227                if ( $unitStatus->isGood() && preg_match( "~[$ic]~", $s->id ) ) {
228                    $unitStatus->fatal( 'tpt-invalid', $s->id );
229                }
230            }
231
232            // We need to do checks for both new and existing units. Someone might have tampered with the
233            // page source adding duplicate or invalid markers.
234            if ( isset( $usedNames[$s->id] ) ) {
235                // If the same ID is used three or more times, the same
236                // error will be added more than once, but that's okay,
237                // Status::fatal will deduplicate
238                $unitStatus->fatal( 'tpt-duplicate', $s->id );
239            }
240            $usedNames[$s->id] = true;
241
242            $status->merge( $unitStatus );
243        }
244
245        return $status;
246    }
247
248    /**
249     * This function does the heavy duty of marking a page.
250     * - Updates the source page with section markers.
251     * - Updates translate_sections table
252     * - Updates revtags table
253     * - Sets up renderjobs to update the translation pages
254     * - Invalidates caches
255     * - Adds interim cache for MessageIndex
256     *
257     * @param TranslatablePageMarkOperation $operation
258     * @param TranslatablePageSettings $pageSettings Contains information about priority languages, units that should
259     * not be fuzzed, whether title should be translated and other translatable page settings
260     * @param User $user User performing the action. Checking user
261     * permissions is the caller’s responsibility
262     * @return int The number of translation units actually used
263     */
264    public function markForTranslation(
265        TranslatablePageMarkOperation $operation,
266        TranslatablePageSettings $pageSettings,
267        User $user
268    ): int {
269        if ( !$operation->isValid() ) {
270            throw new LogicException( 'Trying to mark a page for translation that is not valid' );
271        }
272
273        $page = $operation->getPage();
274        $newRevisionId = $this->updateSectionMarkers( $page, $user, $operation );
275        // Probably a no-change edit, so no new revision was assigned. Get the latest revision manually
276        // Could also occur on the off chance $newRevisionRecord->getId() returns null
277        $newRevisionId ??= $page->getTitle()->getLatestRevID();
278
279        $inserts = [];
280        $changed = [];
281        $groupId = $page->getMessageGroupId();
282        $maxId = (int)$this->messageGroupMetadata->get( $groupId, 'maxid' );
283
284        $pageId = $page->getTitle()->getArticleID();
285        $sections = $pageSettings->shouldTranslateTitle()
286            ? $operation->getUnits()
287            : array_filter(
288                $operation->getUnits(),
289                static fn ( TranslationUnit $s ) => $s->id !== TranslatablePage::DISPLAY_TITLE_UNIT_ID
290            );
291
292        foreach ( array_values( $sections ) as $index => $s ) {
293            $maxId = max( $maxId, (int)$s->id );
294            $changed[] = $s->id;
295
296            if ( in_array( $s->id, $pageSettings->getNoFuzzyUnits(), true ) ) {
297                // UpdateTranslatablePageJob will only fuzzy when type is changed
298                $s->type = 'old';
299            }
300
301            $inserts[] = [
302                'trs_page' => $pageId,
303                'trs_key' => $s->id,
304                'trs_text' => $s->getText(),
305                'trs_order' => $index
306            ];
307        }
308
309        $dbw = $this->loadBalancer->getConnection( DB_PRIMARY );
310        $dbw->delete(
311            'translate_sections',
312            [ 'trs_page' => $page->getTitle()->getArticleID() ],
313            __METHOD__
314        );
315        $dbw->insert( 'translate_sections', $inserts, __METHOD__ );
316
317        $this->saveMetadata( $operation, $pageSettings, $maxId, $user );
318
319        $page->addMarkedTag( $newRevisionId );
320        $this->messageGroups->recache();
321
322        // Store interim cache
323        $group = $page->getMessageGroup();
324        $newKeys = $group->makeGroupKeys( $changed );
325        $this->messageIndex->storeInterim( $group, $newKeys );
326
327        $job = UpdateTranslatablePageJob::newFromPage( $page, $sections );
328        $this->jobQueueGroup->push( $job );
329
330        // Logging
331        $entry = new ManualLogEntry( 'pagetranslation', 'mark' );
332        $entry->setPerformer( $user );
333        $entry->setTarget( $page->getTitle() );
334        $entry->setParameters( [
335            'revision' => $newRevisionId,
336            'changed' => count( $changed ),
337        ] );
338        $logId = $entry->insert();
339        $entry->publish( $logId );
340
341        // Clear more caches
342        $page->getTitle()->invalidateCache();
343
344        return count( $sections );
345    }
346
347    private function saveMetadata(
348        TranslatablePageMarkOperation $operation,
349        TranslatablePageSettings $pageSettings,
350        int $maxId,
351        UserIdentity $user
352    ): void {
353        $page = $operation->getPage();
354        $groupId = $page->getMessageGroupId();
355
356        $this->messageGroupMetadata->set( $groupId, 'maxid', (string)$maxId );
357        if ( $pageSettings->shouldForceLatestSyntaxVersion() || $operation->isFirstMark() ) {
358            $this->messageGroupMetadata->set( $groupId, 'version', self::LATEST_SYNTAX_VERSION );
359        }
360
361        $this->messageGroupMetadata->set(
362            $groupId,
363            'transclusion',
364            $pageSettings->shouldEnableTransclusion() ? '1' : '0'
365        );
366
367        $this->handlePriorityLanguages( $operation->getPage(), $pageSettings, $user );
368    }
369
370    private function handlePriorityLanguages(
371        TranslatablePage $page,
372        TranslatablePageSettings $pageSettings,
373        UserIdentity $user
374    ): void {
375        $languages = implode( ',', $pageSettings->getPriorityLanguages() );
376        $reason = false;
377
378        if ( $languages !== '' ) {
379            $reason = $pageSettings->getPriorityLanguageComment();
380            $force = $pageSettings->shouldForcePriorityLanguage() ? 'on' : 'off';
381        } else {
382            $languages = false;
383            $force = $pageSettings->shouldForcePriorityLanguage() ? 'on' : false;
384            if ( $force === 'on' ) {
385                // We use the reason, if priority force and / or priority languages are set
386                // Otherwise just a reason doesn't make sense
387                $reason = $pageSettings->getPriorityLanguageComment();
388            }
389        }
390
391        $groupId = $page->getMessageGroupId();
392        // old metadata
393        $opLanguages = $this->messageGroupMetadata->get( $groupId, 'prioritylangs' );
394        $opForce = $this->messageGroupMetadata->get( $groupId, 'priorityforce' );
395        $opReason = $this->messageGroupMetadata->get( $groupId, 'priorityreason' );
396
397        $this->messageGroupMetadata->set( $groupId, 'prioritylangs', $languages );
398        $this->messageGroupMetadata->set( $groupId, 'priorityforce', $force );
399        $this->messageGroupMetadata->set( $groupId, 'priorityreason', $reason );
400
401        if ( $opLanguages !== $languages || $opForce !== $force || $opReason !== $reason ) {
402            $logComment = $reason === false ? '' : $reason;
403            $params = [
404                'languages' => $languages,
405                'force' => $force,
406                'reason' => $reason,
407            ];
408
409            $entry = new ManualLogEntry( 'pagetranslation', 'prioritylanguages' );
410            $entry->setPerformer( $user );
411            $entry->setTarget( $page->getTitle() );
412            $entry->setParameters( $params );
413            $entry->setComment( $logComment );
414            $logId = $entry->insert();
415            $entry->publish( $logId );
416        }
417    }
418
419    private function prepareTranslationUnits( TranslatablePage $page, ParserOutput $parserOutput ): array {
420        $highest = (int)$this->messageGroupMetadata->get( $page->getMessageGroupId(), 'maxid' );
421
422        $store = $this->translationUnitStoreFactory->getReader( $page->getPageIdentity() );
423        $storedUnits = $store->getUnits();
424
425        // Prepend the display title unit, which is not part of the page contents
426        $displayTitle = new TranslationUnit(
427            $this->titleFormatter->getPrefixedText( $page->getPageIdentity() ),
428            TranslatablePage::DISPLAY_TITLE_UNIT_ID
429        );
430
431        $units = [ TranslatablePage::DISPLAY_TITLE_UNIT_ID => $displayTitle ] + $parserOutput->units();
432
433        // Figure out the largest used translation unit id
434        foreach ( array_keys( $storedUnits ) as $key ) {
435            $highest = max( $highest, (int)$key );
436        }
437        foreach ( $units as $_ ) {
438            $highest = max( $highest, (int)$_->id );
439        }
440
441        foreach ( $units as $s ) {
442            $s->type = 'old';
443
444            if ( $s->id === TranslationUnit::NEW_UNIT_ID ) {
445                $s->type = 'new';
446                $s->id = (string)( ++$highest );
447            } else {
448                if ( isset( $storedUnits[$s->id] ) ) {
449                    $storedText = $storedUnits[$s->id]->text;
450                    if ( $s->text !== $storedText ) {
451                        $s->type = 'changed';
452                        $s->oldText = $storedText;
453                    }
454                }
455            }
456        }
457
458        // Figure out which units were deleted by removing the still existing units
459        $deletedUnits = $storedUnits;
460        foreach ( $units as $s ) {
461            unset( $deletedUnits[$s->id] );
462        }
463
464        return [ $units, $deletedUnits ];
465    }
466
467    private function updateSectionMarkers(
468        TranslatablePage $page,
469        Authority $authority,
470        TranslatablePageMarkOperation $operation
471    ): ?int {
472        $pageUpdater = $this->wikiPageFactory->newFromTitle( $page->getTitle() )->newPageUpdater( $authority );
473        $content = ContentHandler::makeContent(
474            $operation->getParserOutput()->sourcePageTextForSaving(),
475            $page->getTitle()
476        );
477        $comment = CommentStoreComment::newUnsavedComment(
478            Message::newFromKey( 'tpt-mark-summary' )->inContentLanguage()->text()
479        );
480
481        $pageUpdater->setContent( SlotRecord::MAIN, $content );
482        if ( $authority->authorizeWrite( 'autopatrol', $page->getTitle() ) ) {
483            $pageUpdater->setRcPatrolStatus( RecentChange::PRC_AUTOPATROLLED );
484        }
485        $newRevisionRecord = $pageUpdater->saveRevision( $comment, EDIT_FORCE_BOT | EDIT_UPDATE );
486
487        $status = $pageUpdater->getStatus();
488        if ( !$status->isOK() ) {
489            throw new TranslatablePageMarkException( [ 'tpt-edit-failed', $status->getMessage() ] );
490        }
491
492        return $newRevisionRecord !== null ? $newRevisionRecord->getId() : null;
493    }
494}