Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
| Total | |
0.00% |
0 / 43 |
|
0.00% |
0 / 3 |
CRAP | |
0.00% |
0 / 1 |
| SectionContentEvaluator | |
0.00% |
0 / 43 |
|
0.00% |
0 / 3 |
272 | |
0.00% |
0 / 1 |
| __construct | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
| calculateSectionContent | |
0.00% |
0 / 15 |
|
0.00% |
0 / 1 |
30 | |||
| getSectionBodyWikitext | |
0.00% |
0 / 27 |
|
0.00% |
0 / 1 |
110 | |||
| 1 | <?php |
| 2 | declare( strict_types = 1 ); |
| 3 | |
| 4 | namespace ContentTranslation\Service; |
| 5 | |
| 6 | use ContentTranslation\Exception\HtmlToWikitextConversionException; |
| 7 | use ContentTranslation\Exception\SectionWikitextRetrievalException; |
| 8 | use ContentTranslation\ParsoidClientFactory; |
| 9 | use ContentTranslation\SectionAction; |
| 10 | use Exception; |
| 11 | use MediaWiki\Content\TextContent; |
| 12 | use MediaWiki\Page\WikiPageFactory; |
| 13 | use MediaWiki\Revision\SlotRecord; |
| 14 | use MediaWiki\Title\Title; |
| 15 | use Wikimedia\Rdbms\IDBAccessObject; |
| 16 | |
| 17 | /** |
| 18 | * Service that calculates the content that should be added to a page by the 'edit' action. |
| 19 | * |
| 20 | * @author Nik Gkountas |
| 21 | * @license GPL-2.0-or-later |
| 22 | * @since 2025.06 |
| 23 | */ |
| 24 | class SectionContentEvaluator { |
| 25 | |
| 26 | public const LEAD_SECTION_DUMMY_TITLE = '__LEAD_SECTION__'; |
| 27 | |
| 28 | public function __construct( |
| 29 | private readonly WikiPageFactory $wikiPageFactory, |
| 30 | private readonly ParsoidClientFactory $parsoidClientFactory |
| 31 | ) { |
| 32 | } |
| 33 | |
| 34 | /** |
| 35 | * @throws HtmlToWikitextConversionException |
| 36 | * @throws SectionWikitextRetrievalException |
| 37 | */ |
| 38 | public function calculateSectionContent( |
| 39 | string $html, |
| 40 | Title $targetTitle, |
| 41 | SectionAction $sectionAction, |
| 42 | string $targetSectionTitle, |
| 43 | ?string $existingSectionTitle = null |
| 44 | ): string { |
| 45 | try { |
| 46 | $parsoidClient = $this->parsoidClientFactory->createParsoidClient(); |
| 47 | $newSectionWikiText = $parsoidClient->convertHtmlToWikitext( |
| 48 | $targetTitle, |
| 49 | $html |
| 50 | )['body']; |
| 51 | } catch ( Exception $exception ) { |
| 52 | throw new HtmlToWikitextConversionException( $exception->getMessage() ); |
| 53 | } |
| 54 | |
| 55 | if ( $sectionAction->isExpandAction() ) { |
| 56 | if ( !$existingSectionTitle ) { |
| 57 | throw new SectionWikitextRetrievalException( "Existing section title empty" ); |
| 58 | } |
| 59 | // If the section action is an expand action, the $existingSectionTitle is NOT null |
| 60 | $existingSectionBodyWikitext = $this->getSectionBodyWikitext( $targetTitle, $existingSectionTitle ); |
| 61 | |
| 62 | // Add the wikitext published by the user at the bottom of the existing contents, as a new paragraph |
| 63 | $newSectionWikiText = "$existingSectionBodyWikitext\n\n$newSectionWikiText"; |
| 64 | } |
| 65 | |
| 66 | if ( $sectionAction->needsSectionTitlePrepending() ) { |
| 67 | // Add the updated target section title the section body wikitext. |
| 68 | // add empty line to the end of the wikitext string, so that the next section title goes into the next line |
| 69 | $newSectionWikiText = "== $targetSectionTitle ==\n$newSectionWikiText\n"; |
| 70 | } |
| 71 | |
| 72 | return $newSectionWikiText; |
| 73 | } |
| 74 | |
| 75 | /** |
| 76 | * Extracts a section's content from wikitext using a simple heading-based regex |
| 77 | * @throws SectionWikitextRetrievalException |
| 78 | */ |
| 79 | private function getSectionBodyWikitext( Title $targetTitle, string $sectionTitle ): string { |
| 80 | // set "flags" argument to READ_LATEST, so that we always read the latest information from the primary database |
| 81 | // bypassing caches. This is important in testing environments, when target articles are created on |
| 82 | // the fly by "SectionTranslationBeforePublishHandler" |
| 83 | if ( !$targetTitle->exists( IDBAccessObject::READ_LATEST ) ) { |
| 84 | throw new SectionWikitextRetrievalException( "Target title doesn't exist" ); |
| 85 | } |
| 86 | |
| 87 | $wikiPage = $this->wikiPageFactory->newFromTitle( $targetTitle ); |
| 88 | $lastRevision = $wikiPage->getRevisionRecord(); |
| 89 | |
| 90 | if ( !$lastRevision ) { |
| 91 | throw new SectionWikitextRetrievalException( 'Target revision not found' ); |
| 92 | } |
| 93 | |
| 94 | $content = $lastRevision->getContent( SlotRecord::MAIN ); |
| 95 | |
| 96 | if ( !$content instanceof TextContent ) { |
| 97 | throw new SectionWikitextRetrievalException( 'Cannot retrieve text content' ); |
| 98 | } |
| 99 | |
| 100 | $pageWikitext = $content->getText(); |
| 101 | |
| 102 | // Handle special case for lead section |
| 103 | if ( $sectionTitle === self::LEAD_SECTION_DUMMY_TITLE ) { |
| 104 | // Everything before the first top-level heading (== ... ==) |
| 105 | $pattern = '/^(.*?)(?=^==[^=])/ms'; |
| 106 | if ( preg_match( $pattern, $pageWikitext, $matches ) ) { |
| 107 | $leadWikitext = trim( $matches[1] ); |
| 108 | if ( $leadWikitext !== '' ) { |
| 109 | return $leadWikitext; |
| 110 | } |
| 111 | } |
| 112 | |
| 113 | // If there are no headings at all, entire article is the lead |
| 114 | $trimmed = trim( $pageWikitext ); |
| 115 | if ( $trimmed !== '' ) { |
| 116 | return $trimmed; |
| 117 | } |
| 118 | |
| 119 | throw new SectionWikitextRetrievalException( 'Lead section text not found' ); |
| 120 | } |
| 121 | |
| 122 | $sectionBodyWikitext = null; |
| 123 | |
| 124 | $pattern = '/^==\s*' . preg_quote( $sectionTitle, '/' ) . '\s*==\s*$(.*?)^(==[^=].*?$|\z)/msi'; |
| 125 | if ( preg_match( $pattern, $pageWikitext, $matches ) ) { |
| 126 | $sectionBodyWikitext = trim( $matches[1] ); |
| 127 | } |
| 128 | |
| 129 | if ( !$sectionBodyWikitext ) { |
| 130 | throw new SectionWikitextRetrievalException( 'Section body text not found' ); |
| 131 | } |
| 132 | |
| 133 | return $sectionBodyWikitext; |
| 134 | } |
| 135 | } |