Translate extension for MediaWiki
 
Loading...
Searching...
No Matches
TranslatablePageParser.php
1<?php
2declare( strict_types = 1 );
3
4namespace MediaWiki\Extension\Translate\PageTranslation;
5
7use Wikimedia\Message\MessageValue;
8
17 private ParsingPlaceholderFactory $placeholderFactory;
18
19 public function __construct( ParsingPlaceholderFactory $placeholderFactory ) {
20 $this->placeholderFactory = $placeholderFactory;
21 }
22
23 public function containsMarkup( string $text ): bool {
24 $nowiki = [];
25 $text = $this->armourNowiki( $nowiki, $text );
26 return preg_match( '~</?translate[ >]~', $text ) !== 0;
27 }
28
33 public function cleanupTags( string $text ): string {
34 $nowiki = [];
35 $text = $this->armourNowiki( $nowiki, $text );
36 $text = preg_replace( '~<translate( nowrap)?>\n?~s', '', $text );
37 $text = preg_replace( '~\n?</translate>~s', '', $text );
38 // Markers: headers and the rest
39 $ic = preg_quote( TranslationUnit::UNIT_MARKER_INVALID_CHARS, '~' );
40 $text = preg_replace( "~(^=.*=) <!--T:[^$ic]+-->$~um", '\1', $text );
41 $text = preg_replace( "~<!--T:[^$ic]+-->[\n ]?~um", '', $text );
42 // Remove variables
43 $unit = new TranslationUnit( $text );
44 $text = $unit->getTextForTrans();
45
46 return $this->unarmourNowiki( $nowiki, $text );
47 }
48
50 public function parse( string $text ): ParserOutput {
51 $nowiki = [];
52 $text = $this->armourNowiki( $nowiki, $text );
53
54 $sections = [];
55 $tagPlaceHolders = [];
56
57 while ( true ) {
58 $re = '~(<translate(?: nowrap)?>)(.*?)</translate>~s';
59 $matches = [];
60 $ok = preg_match( $re, $text, $matches, PREG_OFFSET_CAPTURE );
61
62 if ( $ok === 0 || $ok === false ) {
63 break; // No match or failure
64 }
65
66 $contentWithTags = $matches[0][0];
67 $contentWithoutTags = $matches[2][0];
68 // These are offsets to the content inside the tags in $text
69 $offsetStart = $matches[0][1];
70 $offsetEnd = $offsetStart + strlen( $contentWithTags );
71
72 // Replace the whole match with a placeholder
73 $ph = $this->placeholderFactory->make();
74 $text = substr( $text, 0, $offsetStart ) . $ph . substr( $text, $offsetEnd );
75
76 if ( preg_match( '~<translate( nowrap)?>~', $contentWithoutTags ) !== 0 ) {
77 throw new ParsingFailure(
78 'Nested tags',
79 new MessageValue( 'pt-parse-nested', [ wfEscapeWikiText( $contentWithoutTags ) ] )
80 );
81 }
82
83 $openTag = $matches[1][0];
84 $canWrap = $openTag !== '<translate nowrap>';
85
86 // Parse the content inside the tags
87 $contentWithoutTags = $this->unarmourNowiki( $nowiki, $contentWithoutTags );
88 $parse = $this->parseSection( $contentWithoutTags, $canWrap );
89
90 // Update list of sections and the template with the results
91 $sections += $parse['sections'];
92 $tagPlaceHolders[$ph] = new Section( $openTag, $parse['template'], '</translate>' );
93 }
94
95 $prettyTemplate = $text;
96 foreach ( $tagPlaceHolders as $ph => $value ) {
97 $prettyTemplate = str_replace( $ph, '[...]', $prettyTemplate );
98 }
99
100 if ( preg_match( '~<translate( nowrap)?>~', $text ) !== 0 ) {
101 throw new ParsingFailure(
102 'Unmatched opening tag',
103 // TODO this and the below one should use ->plaintextParams() instead of wfEscapeWikiText,
104 // but MediaWiki\EditPage\Constraint\EditFilterMergedContentHookConstraint::formatStatusErrors()
105 // calls ->plain() instead of ->parse(), at which point the meaning of plain text params gets lost
106 new MessageValue( 'pt-parse-open', [ wfEscapeWikiText( $prettyTemplate ) ] )
107 );
108 } elseif ( str_contains( $text, '</translate>' ) ) {
109 throw new ParsingFailure(
110 "Unmatched closing tag",
111 new MessageValue( 'pt-parse-close', [ wfEscapeWikiText( $prettyTemplate ) ] )
112 );
113 }
114
115 $text = $this->unarmourNowiki( $nowiki, $text );
116
117 return new ParserOutput( $text, $tagPlaceHolders, $sections );
118 }
119
126 public function parseSection( string $text, bool $canWrap ): array {
127 $flags = PREG_SPLIT_NO_EMPTY | PREG_SPLIT_DELIM_CAPTURE;
128 $parts = preg_split( '~(^\s*|\s*\n\n\s*|\s*$)~', $text, -1, $flags );
129
130 $inline = preg_match( '~\n~', $text ) === 0;
131
132 $template = '';
133 $sections = [];
134
135 foreach ( $parts as $_ ) {
136 if ( trim( $_ ) === '' ) {
137 $template .= $_;
138 } else {
139 $ph = $this->placeholderFactory->make();
140 $tpSection = $this->parseUnit( $_ );
141 $tpSection->setIsInline( $inline );
142 $tpSection->setCanWrap( $canWrap );
143 $sections[$ph] = $tpSection;
144 $template .= $ph;
145 }
146 }
147
148 return [
149 'template' => $template,
150 'sections' => $sections,
151 ];
152 }
153
160 public function parseUnit( string $content ): TranslationUnit {
161 $re = '~<!--T:(.*?)-->~';
162 $matches = [];
163 $count = preg_match_all( $re, $content, $matches, PREG_SET_ORDER );
164
165 if ( $count > 1 ) {
166 throw new ParsingFailure(
167 'Multiple translation unit markers',
168 new MessageValue( 'pt-shake-multiple', [ wfEscapeWikiText( $content ) ] )
169 );
170 }
171
172 // If no id given in the source, default to a new section id
173 $id = TranslationUnit::NEW_UNIT_ID;
174 if ( $count === 1 ) {
175 foreach ( $matches as $match ) {
176 [ /*full*/, $id ] = $match;
177
178 // Currently handle only these two standard places.
179 // Is this too strict?
180 $rer1 = '~^<!--T:(.*?)-->( |\n)~'; // Normal sections
181 $rer2 = '~\s*<!--T:(.*?)-->$~m'; // Sections with title
182 $content = preg_replace( $rer1, '', $content );
183 $content = preg_replace( $rer2, '', $content );
184
185 if ( preg_match( $re, $content ) === 1 ) {
186 throw new ParsingFailure(
187 'Translation unit marker is in unsupported position',
188 new MessageValue( 'pt-shake-position', [ wfEscapeWikiText( $content ) ] )
189 );
190 } elseif ( trim( $content ) === '' ) {
191 throw new ParsingFailure(
192 'Translation unit has no content besides marker',
193 new MessageValue( 'pt-shake-empty', [ wfEscapeWikiText( $id ) ] )
194 );
195 }
196 }
197 }
198
199 return new TranslationUnit( $content, $id );
200 }
201
203 public function armourNowiki( array &$holders, string $text ): string {
204 $re = '~(<nowiki>)(.*?)(</nowiki>)~s';
205
206 while ( preg_match( $re, $text, $matches ) ) {
207 $ph = $this->placeholderFactory->make();
208 $text = str_replace( $matches[0], $ph, $text );
209 $holders[$ph] = $matches[0];
210 }
211
212 return $text;
213 }
214
216 public function unarmourNowiki( array $holders, string $text ): string {
217 return strtr( $text, $holders );
218 }
219}
return[ 'Translate:AggregateGroupManager'=> static function(MediaWikiServices $services):AggregateGroupManager { return new AggregateGroupManager($services->getTitleFactory(), $services->get( 'Translate:MessageGroupMetadata'));}, 'Translate:AggregateGroupMessageGroupFactory'=> static function(MediaWikiServices $services):AggregateGroupMessageGroupFactory { return new AggregateGroupMessageGroupFactory($services->get( 'Translate:MessageGroupMetadata'));}, 'Translate:ConfigHelper'=> static function():ConfigHelper { return new ConfigHelper();}, 'Translate:CsvTranslationImporter'=> static function(MediaWikiServices $services):CsvTranslationImporter { return new CsvTranslationImporter( $services->getWikiPageFactory());}, 'Translate:EntitySearch'=> static function(MediaWikiServices $services):EntitySearch { return new EntitySearch($services->getMainWANObjectCache(), $services->getCollationFactory() ->makeCollation( 'uca-default-u-kn'), MessageGroups::singleton(), $services->getNamespaceInfo(), $services->get( 'Translate:MessageIndex'), $services->getTitleParser(), $services->getTitleFormatter());}, 'Translate:ExternalMessageSourceStateComparator'=> static function(MediaWikiServices $services):ExternalMessageSourceStateComparator { return new ExternalMessageSourceStateComparator(new SimpleStringComparator(), $services->getRevisionLookup(), $services->getPageStore());}, 'Translate:ExternalMessageSourceStateImporter'=> static function(MediaWikiServices $services):ExternalMessageSourceStateImporter { return new ExternalMessageSourceStateImporter($services->get( 'Translate:GroupSynchronizationCache'), $services->getJobQueueGroup(), LoggerFactory::getInstance(LogNames::GROUP_SYNCHRONIZATION), $services->get( 'Translate:MessageIndex'), $services->getTitleFactory(), $services->get( 'Translate:MessageGroupSubscription'), new ServiceOptions(ExternalMessageSourceStateImporter::CONSTRUCTOR_OPTIONS, $services->getMainConfig()));}, 'Translate:FileBasedMessageGroupFactory'=> static function(MediaWikiServices $services):FileBasedMessageGroupFactory { return new FileBasedMessageGroupFactory(new MessageGroupConfigurationParser(), new ServiceOptions(FileBasedMessageGroupFactory::SERVICE_OPTIONS, $services->getMainConfig()),);}, 'Translate:FileFormatFactory'=> static function(MediaWikiServices $services):FileFormatFactory { return new FileFormatFactory( $services->getObjectFactory());}, 'Translate:GroupSynchronizationCache'=> static function(MediaWikiServices $services):GroupSynchronizationCache { return new GroupSynchronizationCache( $services->get( 'Translate:PersistentCache'));}, 'Translate:HookDefinedMessageGroupFactory'=> static function(MediaWikiServices $services):HookDefinedMessageGroupFactory { return new HookDefinedMessageGroupFactory( $services->get( 'Translate:HookRunner'));}, 'Translate:HookRunner'=> static function(MediaWikiServices $services):HookRunner { return new HookRunner( $services->getHookContainer());}, 'Translate:MessageBundleDependencyPurger'=> static function(MediaWikiServices $services):MessageBundleDependencyPurger { return new MessageBundleDependencyPurger( $services->get( 'Translate:TranslatableBundleFactory'));}, 'Translate:MessageBundleMessageGroupFactory'=> static function(MediaWikiServices $services):MessageBundleMessageGroupFactory { return new MessageBundleMessageGroupFactory($services->get( 'Translate:MessageGroupMetadata'), new ServiceOptions(MessageBundleMessageGroupFactory::SERVICE_OPTIONS, $services->getMainConfig()),);}, 'Translate:MessageBundleStore'=> static function(MediaWikiServices $services):MessageBundleStore { return new MessageBundleStore($services->get( 'Translate:RevTagStore'), $services->getJobQueueGroup(), $services->getLanguageNameUtils(), $services->get( 'Translate:MessageIndex'), $services->get( 'Translate:MessageGroupMetadata'));}, 'Translate:MessageBundleTranslationLoader'=> static function(MediaWikiServices $services):MessageBundleTranslationLoader { return new MessageBundleTranslationLoader( $services->getLanguageFallback());}, 'Translate:MessageGroupMetadata'=> static function(MediaWikiServices $services):MessageGroupMetadata { return new MessageGroupMetadata( $services->getConnectionProvider());}, 'Translate:MessageGroupReviewStore'=> static function(MediaWikiServices $services):MessageGroupReviewStore { return new MessageGroupReviewStore($services->getConnectionProvider(), $services->get( 'Translate:HookRunner'));}, 'Translate:MessageGroupStatsTableFactory'=> static function(MediaWikiServices $services):MessageGroupStatsTableFactory { return new MessageGroupStatsTableFactory($services->get( 'Translate:ProgressStatsTableFactory'), $services->getLinkRenderer(), $services->get( 'Translate:MessageGroupReviewStore'), $services->get( 'Translate:MessageGroupMetadata'), $services->getMainConfig() ->get( 'TranslateWorkflowStates') !==false);}, 'Translate:MessageGroupSubscription'=> static function(MediaWikiServices $services):MessageGroupSubscription { return new MessageGroupSubscription($services->get( 'Translate:MessageGroupSubscriptionStore'), $services->getJobQueueGroup(), $services->getUserIdentityLookup(), LoggerFactory::getInstance(LogNames::GROUP_SUBSCRIPTION), new ServiceOptions(MessageGroupSubscription::CONSTRUCTOR_OPTIONS, $services->getMainConfig()));}, 'Translate:MessageGroupSubscriptionHookHandler'=> static function(MediaWikiServices $services):MessageGroupSubscriptionHookHandler { return new MessageGroupSubscriptionHookHandler($services->get( 'Translate:MessageGroupSubscription'), $services->getUserFactory());}, 'Translate:MessageGroupSubscriptionStore'=> static function(MediaWikiServices $services):MessageGroupSubscriptionStore { return new MessageGroupSubscriptionStore( $services->getConnectionProvider());}, 'Translate:MessageIndex'=> static function(MediaWikiServices $services):MessageIndex { $params=(array) $services->getMainConfig() ->get( 'TranslateMessageIndex');$class=array_shift( $params);$implementationMap=['HashMessageIndex'=> HashMessageIndex::class, 'CDBMessageIndex'=> CDBMessageIndex::class, 'DatabaseMessageIndex'=> DatabaseMessageIndex::class, 'hash'=> HashMessageIndex::class, 'cdb'=> CDBMessageIndex::class, 'database'=> DatabaseMessageIndex::class,];$messageIndexStoreClass=$implementationMap[$class] ?? $implementationMap['database'];return new MessageIndex(new $messageIndexStoreClass, $services->getMainWANObjectCache(), $services->getJobQueueGroup(), $services->get( 'Translate:HookRunner'), LoggerFactory::getInstance(LogNames::MAIN), $services->getMainObjectStash(), $services->getConnectionProvider(), new ServiceOptions(MessageIndex::SERVICE_OPTIONS, $services->getMainConfig()),);}, 'Translate:MessagePrefixStats'=> static function(MediaWikiServices $services):MessagePrefixStats { return new MessagePrefixStats( $services->getTitleParser());}, 'Translate:ParsingPlaceholderFactory'=> static function():ParsingPlaceholderFactory { return new ParsingPlaceholderFactory();}, 'Translate:PersistentCache'=> static function(MediaWikiServices $services):PersistentCache { return new PersistentDatabaseCache($services->getConnectionProvider(), $services->getJsonCodec());}, 'Translate:ProgressStatsTableFactory'=> static function(MediaWikiServices $services):ProgressStatsTableFactory { return new ProgressStatsTableFactory($services->getLinkRenderer(), $services->get( 'Translate:ConfigHelper'), $services->get( 'Translate:MessageGroupMetadata'));}, 'Translate:RevTagStore'=> static function(MediaWikiServices $services):RevTagStore { return new RevTagStore( $services->getConnectionProvider());}, 'Translate:SubpageListBuilder'=> static function(MediaWikiServices $services):SubpageListBuilder { return new SubpageListBuilder($services->get( 'Translate:TranslatableBundleFactory'), $services->getLinkBatchFactory());}, 'Translate:TranslatableBundleDeleter'=> static function(MediaWikiServices $services):TranslatableBundleDeleter { return new TranslatableBundleDeleter($services->getMainObjectStash(), $services->getJobQueueGroup(), $services->get( 'Translate:SubpageListBuilder'), $services->get( 'Translate:TranslatableBundleFactory'));}, 'Translate:TranslatableBundleExporter'=> static function(MediaWikiServices $services):TranslatableBundleExporter { return new TranslatableBundleExporter($services->get( 'Translate:SubpageListBuilder'), $services->getWikiExporterFactory(), $services->getConnectionProvider());}, 'Translate:TranslatableBundleFactory'=> static function(MediaWikiServices $services):TranslatableBundleFactory { return new TranslatableBundleFactory($services->get( 'Translate:TranslatablePageStore'), $services->get( 'Translate:MessageBundleStore'));}, 'Translate:TranslatableBundleImporter'=> static function(MediaWikiServices $services):TranslatableBundleImporter { return new TranslatableBundleImporter($services->getWikiImporterFactory(), $services->get( 'Translate:TranslatablePageParser'), $services->getRevisionLookup(), $services->getNamespaceInfo(), $services->getTitleFactory(), $services->getFormatterFactory());}, 'Translate:TranslatableBundleMover'=> static function(MediaWikiServices $services):TranslatableBundleMover { return new TranslatableBundleMover($services->getMovePageFactory(), $services->getJobQueueGroup(), $services->getLinkBatchFactory(), $services->get( 'Translate:TranslatableBundleFactory'), $services->get( 'Translate:SubpageListBuilder'), $services->getConnectionProvider(), $services->getObjectCacheFactory(), $services->getMainConfig() ->get( 'TranslatePageMoveLimit'));}, 'Translate:TranslatableBundleStatusStore'=> static function(MediaWikiServices $services):TranslatableBundleStatusStore { return new TranslatableBundleStatusStore($services->getConnectionProvider() ->getPrimaryDatabase(), $services->getCollationFactory() ->makeCollation( 'uca-default-u-kn'), $services->getDBLoadBalancer() ->getMaintenanceConnectionRef(DB_PRIMARY));}, 'Translate:TranslatablePageMarker'=> static function(MediaWikiServices $services):TranslatablePageMarker { return new TranslatablePageMarker($services->getConnectionProvider(), $services->getJobQueueGroup(), $services->getLinkRenderer(), MessageGroups::singleton(), $services->get( 'Translate:MessageIndex'), $services->getTitleFormatter(), $services->getTitleParser(), $services->get( 'Translate:TranslatablePageParser'), $services->get( 'Translate:TranslatablePageStore'), $services->get( 'Translate:TranslatablePageStateStore'), $services->get( 'Translate:TranslationUnitStoreFactory'), $services->get( 'Translate:MessageGroupMetadata'), $services->getWikiPageFactory(), $services->get( 'Translate:TranslatablePageView'), $services->get( 'Translate:MessageGroupSubscription'), $services->getFormatterFactory());}, 'Translate:TranslatablePageMessageGroupFactory'=> static function(MediaWikiServices $services):TranslatablePageMessageGroupFactory { return new TranslatablePageMessageGroupFactory(new ServiceOptions(TranslatablePageMessageGroupFactory::SERVICE_OPTIONS, $services->getMainConfig()),);}, 'Translate:TranslatablePageParser'=> static function(MediaWikiServices $services):TranslatablePageParser { return new TranslatablePageParser($services->get( 'Translate:ParsingPlaceholderFactory'));}, 'Translate:TranslatablePageStateStore'=> static function(MediaWikiServices $services):TranslatablePageStateStore { return new TranslatablePageStateStore($services->get( 'Translate:PersistentCache'), $services->getPageStore());}, 'Translate:TranslatablePageStore'=> static function(MediaWikiServices $services):TranslatablePageStore { return new TranslatablePageStore($services->get( 'Translate:MessageIndex'), $services->getJobQueueGroup(), $services->get( 'Translate:RevTagStore'), $services->getConnectionProvider(), $services->get( 'Translate:TranslatableBundleStatusStore'), $services->get( 'Translate:TranslatablePageParser'), $services->get( 'Translate:MessageGroupMetadata'));}, 'Translate:TranslatablePageView'=> static function(MediaWikiServices $services):TranslatablePageView { return new TranslatablePageView($services->getConnectionProvider(), $services->get( 'Translate:TranslatablePageStateStore'), new ServiceOptions(TranslatablePageView::SERVICE_OPTIONS, $services->getMainConfig()));}, 'Translate:TranslateSandbox'=> static function(MediaWikiServices $services):TranslateSandbox { return new TranslateSandbox($services->getUserFactory(), $services->getConnectionProvider(), $services->getPermissionManager(), $services->getAuthManager(), $services->getUserGroupManager(), $services->getActorStore(), $services->getUserOptionsManager(), $services->getJobQueueGroup(), $services->get( 'Translate:HookRunner'), new ServiceOptions(TranslateSandbox::CONSTRUCTOR_OPTIONS, $services->getMainConfig()));}, 'Translate:TranslationStashReader'=> static function(MediaWikiServices $services):TranslationStashReader { return new TranslationStashStorage( $services->getConnectionProvider() ->getPrimaryDatabase());}, 'Translate:TranslationStatsDataProvider'=> static function(MediaWikiServices $services):TranslationStatsDataProvider { return new TranslationStatsDataProvider(new ServiceOptions(TranslationStatsDataProvider::CONSTRUCTOR_OPTIONS, $services->getMainConfig()), $services->getObjectFactory(), $services->getConnectionProvider());}, 'Translate:TranslationUnitStoreFactory'=> static function(MediaWikiServices $services):TranslationUnitStoreFactory { return new TranslationUnitStoreFactory( $services->getDBLoadBalancer());}, 'Translate:TranslatorActivity'=> static function(MediaWikiServices $services):TranslatorActivity { $query=new TranslatorActivityQuery($services->getMainConfig(), $services->getDBLoadBalancer());return new TranslatorActivity($services->getMainObjectStash(), $query, $services->getJobQueueGroup());}, 'Translate:TtmServerFactory'=> static function(MediaWikiServices $services):TtmServerFactory { $config=$services->getMainConfig();$default=$config->get( 'TranslateTranslationDefaultService');if( $default===false) { $default=null;} return new TtmServerFactory( $config->get( 'TranslateTranslationServices'), $default);}]
@phpcs-require-sorted-array
Represents a parsing output produced by TranslatablePageParser.
Represents any kind of failure to parse a translatable page source code.
Generates ParserOutput from text or removes all tags from a text.
cleanupTags(string $text)
Remove all opening and closing translate tags following the same whitespace rules as the regular pars...
parseUnit(string $content)
Checks if this unit already contains a section marker.
parseSection(string $text, bool $canWrap)
Splits the content marked with <translate> tags into translation units, which are separated with two ...
This class represents one translation unit in a translatable page.
Create unique placeholders that can be used when parsing (wiki)text.