MediaWiki master
Parser.php
Go to the documentation of this file.
1<?php
24namespace MediaWiki\Parser;
25
26use BadMethodCallException;
27use Exception;
28use File;
29use HtmlArmor;
32use InvalidArgumentException;
33use LogicException;
34use MapCacheLRU;
35use MediaHandler;
41use MediaWiki\Debug\DeprecationHelper;
85use Psr\Log\LoggerInterface;
86use RuntimeException;
88use StringUtils;
89use UnexpectedValueException;
90use Wikimedia\Bcp47Code\Bcp47CodeValue;
91use Wikimedia\IPUtils;
95use Wikimedia\Parsoid\Core\LinkTarget;
96use Wikimedia\Parsoid\Core\SectionMetadata;
97use Wikimedia\Parsoid\Core\TOCData;
98use Wikimedia\Parsoid\DOM\Comment;
99use Wikimedia\Parsoid\DOM\DocumentFragment;
100use Wikimedia\Parsoid\DOM\Element;
101use Wikimedia\Parsoid\DOM\Node;
102use Wikimedia\Parsoid\Utils\DOMCompat;
103use Wikimedia\Parsoid\Utils\DOMUtils;
104use Wikimedia\RemexHtml\Serializer\SerializerNode;
105use Wikimedia\ScopedCallback;
106
147#[\AllowDynamicProperties]
148class Parser {
149 use DeprecationHelper;
150
151 # Flags for Parser::setFunctionHook
152 public const SFH_NO_HASH = 1;
153 public const SFH_OBJECT_ARGS = 2;
154
155 # Constants needed for external link processing
163 public const EXT_LINK_URL_CLASS = '[^][<>"\\x00-\\x20\\x7F\p{Zs}\x{FFFD}]';
168 // phpcs:ignore Generic.Files.LineLength
169 private const EXT_LINK_ADDR = '(?:[0-9.]+|\\[(?i:[0-9a-f:.]+)\\]|[^][<>"\\x00-\\x20\\x7F\p{Zs}\x{FFFD}])';
171 // phpcs:ignore Generic.Files.LineLength
172 private const EXT_IMAGE_REGEX = '/^(http:\/\/|https:\/\/)((?:\\[(?i:[0-9a-f:.]+)\\])?[^][<>"\\x00-\\x20\\x7F\p{Zs}\x{FFFD}]+)
173 \\/([A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF]+)\\.((?i)avif|gif|jpg|jpeg|png|svg|webp)$/Sxu';
174
176 private const SPACE_NOT_NL = '(?:\t|&nbsp;|&\#0*160;|&\#[Xx]0*[Aa]0;|\p{Zs})';
177
182 public const PTD_FOR_INCLUSION = Preprocessor::DOM_FOR_INCLUSION;
183
184 # Allowed values for $this->mOutputType
186 public const OT_HTML = 1;
188 public const OT_WIKI = 2;
190 public const OT_PREPROCESS = 3;
195 public const OT_PLAIN = 4;
196
214 public const MARKER_SUFFIX = "-QINU`\"'\x7f";
215 public const MARKER_PREFIX = "\x7f'\"`UNIQ-";
216 private const HEADLINE_MARKER_REGEX = '/^' . self::MARKER_PREFIX . '-h-(\d+)-' . self::MARKER_SUFFIX . '/';
217
232 public const TOC_PLACEHOLDER = '<meta property="mw:PageProp/toc" />';
233
234 # Persistent:
236 private array $mTagHooks = [];
238 private array $mFunctionHooks = [];
240 private array $mFunctionSynonyms = [ 0 => [], 1 => [] ];
242 private array $mStripList = [];
244 private array $mVarCache = [];
246 private array $mImageParams = [];
248 private array $mImageParamsMagicArray = [];
250 public $mMarkerIndex = 0;
251
252 // Initialised by initializeVariables()
254 private MagicWordArray $mVariables;
255 private MagicWordArray $mSubstWords;
256
257 // Initialised in constructor
259 private string $mExtLinkBracketedRegex;
260 private UrlUtils $urlUtils;
261 private Preprocessor $mPreprocessor;
262
263 // Cleared with clearState():
265 private ParserOutput $mOutput;
266 private int $mAutonumber = 0;
267 private StripState $mStripState;
268 private LinkHolderArray $mLinkHolders;
269 private int $mLinkID = 0;
270 private array $mIncludeSizes;
281 private array $mTplRedirCache;
283 public array $mHeadings;
285 private array $mDoubleUnderscores;
291 private bool $mShowToc;
292 private bool $mForceTocPosition;
293 private array $mTplDomCache;
294 private ?UserIdentity $mUser;
295
296 # Temporary
297 # These are variables reset at least once per parse regardless of $clearState
298
303 private $mOptions;
304
305 # Deprecated "dynamic" properties
306 # These used to be dynamic properties added to the parser, but these
307 # have been deprecated since 1.42.
311 public $extCite;
328
334 private Title $mTitle;
336 private int $mOutputType;
338 private bool $mStripExtTags = true;
343 private array $ot;
345 private ?int $mRevisionId = null;
347 private ?string $mRevisionTimestamp = null;
349 private ?string $mRevisionUser = null;
351 private ?int $mRevisionSize = null;
353 private $mInputSize = false;
354
355 private ?RevisionRecord $mRevisionRecordObject = null;
356
362 private ?MapCacheLRU $currentRevisionCache = null;
363
368 private $mInParse = false;
369
370 private SectionProfiler $mProfiler;
371 private ?LinkRenderer $mLinkRenderer = null;
372
373 private MagicWordFactory $magicWordFactory;
374 private Language $contLang;
375 private LanguageConverterFactory $languageConverterFactory;
376 private LanguageNameUtils $languageNameUtils;
377 private ParserFactory $factory;
378 private SpecialPageFactory $specialPageFactory;
379 private TitleFormatter $titleFormatter;
385 private ServiceOptions $svcOptions;
386 private LinkRendererFactory $linkRendererFactory;
387 private NamespaceInfo $nsInfo;
388 private LoggerInterface $logger;
389 private BadFileLookup $badFileLookup;
390 private HookContainer $hookContainer;
391 private HookRunner $hookRunner;
392 private TidyDriverBase $tidy;
393 private WANObjectCache $wanCache;
394 private UserOptionsLookup $userOptionsLookup;
395 private UserFactory $userFactory;
396 private HttpRequestFactory $httpRequestFactory;
397 private TrackingCategories $trackingCategories;
398 private SignatureValidatorFactory $signatureValidatorFactory;
399 private UserNameUtils $userNameUtils;
400
404 public const CONSTRUCTOR_OPTIONS = [
405 // See documentation for the corresponding config options
406 // Many of these are only used in (eg) CoreMagicVariables
431 ];
432
460 public function __construct(
461 ServiceOptions $svcOptions,
462 MagicWordFactory $magicWordFactory,
463 Language $contLang,
464 ParserFactory $factory,
465 UrlUtils $urlUtils,
466 SpecialPageFactory $spFactory,
467 LinkRendererFactory $linkRendererFactory,
468 NamespaceInfo $nsInfo,
469 LoggerInterface $logger,
470 BadFileLookup $badFileLookup,
471 LanguageConverterFactory $languageConverterFactory,
472 LanguageNameUtils $languageNameUtils,
473 HookContainer $hookContainer,
474 TidyDriverBase $tidy,
475 WANObjectCache $wanCache,
476 UserOptionsLookup $userOptionsLookup,
477 UserFactory $userFactory,
478 TitleFormatter $titleFormatter,
479 HttpRequestFactory $httpRequestFactory,
480 TrackingCategories $trackingCategories,
481 SignatureValidatorFactory $signatureValidatorFactory,
482 UserNameUtils $userNameUtils
483 ) {
484 $this->deprecateDynamicPropertiesAccess( '1.42', __CLASS__ );
485 $this->deprecatePublicProperty( 'ot', '1.35', __CLASS__ );
486 $this->deprecatePublicProperty( 'mTitle', '1.35', __CLASS__ );
487 $this->deprecatePublicProperty( 'mOptions', '1.35', __CLASS__ );
488
490 // Direct construction of Parser was deprecated in 1.34 and
491 // removed in 1.36; use a ParserFactory instead.
492 throw new BadMethodCallException( 'Direct construction of Parser not allowed' );
493 }
494 $svcOptions->assertRequiredOptions( self::CONSTRUCTOR_OPTIONS );
495 $this->svcOptions = $svcOptions;
496
497 $this->urlUtils = $urlUtils;
498 $this->mExtLinkBracketedRegex = '/\[(((?i)' . $this->urlUtils->validProtocols() . ')' .
499 self::EXT_LINK_ADDR .
500 self::EXT_LINK_URL_CLASS . '*)\p{Zs}*([^\]\\x00-\\x08\\x0a-\\x1F\\x{FFFD}]*)\]/Su';
501
502 $this->magicWordFactory = $magicWordFactory;
503
504 $this->contLang = $contLang;
505
506 $this->factory = $factory;
507 $this->specialPageFactory = $spFactory;
508 $this->linkRendererFactory = $linkRendererFactory;
509 $this->nsInfo = $nsInfo;
510 $this->logger = $logger;
511 $this->badFileLookup = $badFileLookup;
512
513 $this->languageConverterFactory = $languageConverterFactory;
514 $this->languageNameUtils = $languageNameUtils;
515
516 $this->hookContainer = $hookContainer;
517 $this->hookRunner = new HookRunner( $hookContainer );
518
519 $this->tidy = $tidy;
520
521 $this->wanCache = $wanCache;
522 $this->mPreprocessor = new Preprocessor_Hash(
523 $this,
524 $this->wanCache,
525 [
526 'cacheThreshold' => $svcOptions->get( MainConfigNames::PreprocessorCacheThreshold ),
527 'disableLangConversion' => $languageConverterFactory->isConversionDisabled(),
528 ]
529 );
530
531 $this->userOptionsLookup = $userOptionsLookup;
532 $this->userFactory = $userFactory;
533 $this->titleFormatter = $titleFormatter;
534 $this->httpRequestFactory = $httpRequestFactory;
535 $this->trackingCategories = $trackingCategories;
536 $this->signatureValidatorFactory = $signatureValidatorFactory;
537 $this->userNameUtils = $userNameUtils;
538
539 // These steps used to be done in "::firstCallInit()"
540 // (if you're chasing a reference from some old code)
541 CoreParserFunctions::register(
542 $this,
543 new ServiceOptions( CoreParserFunctions::REGISTER_OPTIONS, $svcOptions )
544 );
546 $this,
548 );
549 $this->initializeVariables();
550
551 $this->hookRunner->onParserFirstCallInit( $this );
552 $this->mTitle = Title::makeTitle( NS_SPECIAL, 'Badtitle/Missing' );
553 }
554
558 public function __destruct() {
559 // @phan-suppress-next-line PhanRedundantCondition Typed property not set in constructor, may be uninitialized
560 if ( isset( $this->mLinkHolders ) ) {
561 // @phan-suppress-next-line PhanTypeObjectUnsetDeclaredProperty
562 unset( $this->mLinkHolders );
563 }
564 // @phan-suppress-next-line PhanTypeSuspiciousNonTraversableForeach
565 foreach ( $this as $name => $value ) {
566 unset( $this->$name );
567 }
568 }
569
573 public function __clone() {
574 $this->mInParse = false;
575
576 $this->mPreprocessor = clone $this->mPreprocessor;
577 $this->mPreprocessor->resetParser( $this );
578
579 $this->hookRunner->onParserCloned( $this );
580 }
581
589 public function firstCallInit() {
590 /*
591 * This method should be hard-deprecated once remaining calls are
592 * removed; it no longer does anything.
593 */
594 }
595
601 public function clearState() {
602 $this->resetOutput();
603 $this->mAutonumber = 0;
604 $this->mLinkHolders = new LinkHolderArray(
605 $this,
606 $this->getContentLanguageConverter(),
607 $this->getHookContainer()
608 );
609 $this->mLinkID = 0;
610 $this->mRevisionTimestamp = null;
611 $this->mRevisionId = null;
612 $this->mRevisionUser = null;
613 $this->mRevisionSize = null;
614 $this->mRevisionRecordObject = null;
615 $this->mVarCache = [];
616 $this->mUser = null;
617 $this->currentRevisionCache = null;
618
619 $this->mStripState = new StripState( $this );
620
621 # Clear these on every parse, T6549
622 $this->mTplRedirCache = [];
623 $this->mTplDomCache = [];
624
625 $this->mShowToc = true;
626 $this->mForceTocPosition = false;
627 $this->mIncludeSizes = [
628 'post-expand' => 0,
629 'arg' => 0,
630 ];
631 $this->mPPNodeCount = 0;
632 $this->mHighestExpansionDepth = 0;
633 $this->mHeadings = [];
634 $this->mDoubleUnderscores = [];
635 $this->mExpensiveFunctionCount = 0;
636
637 $this->mProfiler = new SectionProfiler();
638
639 $this->hookRunner->onParserClearState( $this );
640 }
641
646 public function resetOutput() {
647 $this->mOutput = new ParserOutput;
648 $this->mOptions->registerWatcher( [ $this->mOutput, 'recordOption' ] );
649 }
650
669 public function parse(
670 $text, PageReference $page, ParserOptions $options,
671 $linestart = true, $clearState = true, $revid = null
672 ) {
673 if ( $clearState ) {
674 // We use U+007F DELETE to construct strip markers, so we have to make
675 // sure that this character does not occur in the input text.
676 $text = strtr( $text, "\x7f", "?" );
677 $magicScopeVariable = $this->lock();
678 }
679 // Strip U+0000 NULL (T159174)
680 $text = str_replace( "\000", '', $text );
681
682 $this->startParse( $page, $options, self::OT_HTML, $clearState );
683
684 $this->currentRevisionCache = null;
685 $this->mInputSize = strlen( $text );
686 $this->mOutput->resetParseStartTime();
687
688 $oldRevisionId = $this->mRevisionId;
689 $oldRevisionRecordObject = $this->mRevisionRecordObject;
690 $oldRevisionTimestamp = $this->mRevisionTimestamp;
691 $oldRevisionUser = $this->mRevisionUser;
692 $oldRevisionSize = $this->mRevisionSize;
693 if ( $revid !== null ) {
694 $this->mRevisionId = $revid;
695 $this->mRevisionRecordObject = null;
696 $this->mRevisionTimestamp = null;
697 $this->mRevisionUser = null;
698 $this->mRevisionSize = null;
699 }
700
701 $text = $this->internalParse( $text );
702 $this->hookRunner->onParserAfterParse( $this, $text, $this->mStripState );
703
704 $text = $this->internalParseHalfParsed( $text, true, $linestart );
705
713 if ( !$options->getDisableTitleConversion()
714 && !isset( $this->mDoubleUnderscores['nocontentconvert'] )
715 && !isset( $this->mDoubleUnderscores['notitleconvert'] )
716 && $this->mOutput->getDisplayTitle() === false
717 ) {
718 $titleText = $this->getTargetLanguageConverter()->getConvRuleTitle();
719 if ( $titleText !== false ) {
720 $titleText = Sanitizer::removeSomeTags( $titleText );
721 } else {
722 [ $nsText, $nsSeparator, $mainText ] = $this->getTargetLanguageConverter()->convertSplitTitle( $page );
723 // In the future, those three pieces could be stored separately rather than joined into $titleText,
724 // and OutputPage would format them and join them together, to resolve T314399.
725 $titleText = self::formatPageTitle( $nsText, $nsSeparator, $mainText );
726 }
727 $this->mOutput->setTitleText( $titleText );
728 }
729
730 # Recording timing info. Must be called before finalizeAdaptiveCacheExpiry() and
731 # makeLimitReport(), which make use of the timing info.
732 $this->mOutput->recordTimeProfile();
733
734 # Compute runtime adaptive expiry if set
735 $this->mOutput->finalizeAdaptiveCacheExpiry();
736
737 # Warn if too many heavyweight parser functions were used
738 if ( $this->mExpensiveFunctionCount > $options->getExpensiveParserFunctionLimit() ) {
739 $this->limitationWarn( 'expensive-parserfunction',
740 $this->mExpensiveFunctionCount,
742 );
743 }
744
745 # Information on limits, for the benefit of users who try to skirt them
746 if ( $this->svcOptions->get( MainConfigNames::EnableParserLimitReporting ) ) {
747 $this->makeLimitReport( $this->mOptions, $this->mOutput );
748 }
749
750 $this->mOutput->setFromParserOptions( $options );
751
752 $this->mOutput->setRawText( $text );
753
754 $this->mRevisionId = $oldRevisionId;
755 $this->mRevisionRecordObject = $oldRevisionRecordObject;
756 $this->mRevisionTimestamp = $oldRevisionTimestamp;
757 $this->mRevisionUser = $oldRevisionUser;
758 $this->mRevisionSize = $oldRevisionSize;
759 $this->mInputSize = false;
760 $this->currentRevisionCache = null;
761
762 return $this->mOutput;
763 }
764
769 public function makeLimitReport(
770 ParserOptions $parserOptions, ParserOutput $parserOutput
771 ) {
772 $maxIncludeSize = $parserOptions->getMaxIncludeSize();
773
774 $cpuTime = $parserOutput->getTimeProfile( 'cpu' );
775 if ( $cpuTime !== null ) {
776 $parserOutput->setLimitReportData( 'limitreport-cputime',
777 sprintf( "%.3f", $cpuTime )
778 );
779 }
780
781 $wallTime = $parserOutput->getTimeProfile( 'wall' );
782 $parserOutput->setLimitReportData( 'limitreport-walltime',
783 sprintf( "%.3f", $wallTime )
784 );
785
786 $parserOutput->setLimitReportData( 'limitreport-ppvisitednodes',
787 [ $this->mPPNodeCount, $parserOptions->getMaxPPNodeCount() ]
788 );
789 $parserOutput->setLimitReportData( 'limitreport-postexpandincludesize',
790 [ $this->mIncludeSizes['post-expand'], $maxIncludeSize ]
791 );
792 $parserOutput->setLimitReportData( 'limitreport-templateargumentsize',
793 [ $this->mIncludeSizes['arg'], $maxIncludeSize ]
794 );
795 $parserOutput->setLimitReportData( 'limitreport-expansiondepth',
796 [ $this->mHighestExpansionDepth, $parserOptions->getMaxPPExpandDepth() ]
797 );
798 $parserOutput->setLimitReportData( 'limitreport-expensivefunctioncount',
799 [ $this->mExpensiveFunctionCount, $parserOptions->getExpensiveParserFunctionLimit() ]
800 );
801
802 foreach ( $this->mStripState->getLimitReport() as [ $key, $value ] ) {
803 $parserOutput->setLimitReportData( $key, $value );
804 }
805
806 $this->hookRunner->onParserLimitReportPrepare( $this, $parserOutput );
807
808 // Add on template profiling data in human/machine readable way
809 $dataByFunc = $this->mProfiler->getFunctionStats();
810 uasort( $dataByFunc, static function ( $a, $b ) {
811 return $b['real'] <=> $a['real']; // descending order
812 } );
813 $profileReport = [];
814 foreach ( array_slice( $dataByFunc, 0, 10 ) as $item ) {
815 $profileReport[] = sprintf( "%6.2f%% %8.3f %6d %s",
816 $item['%real'], $item['real'], $item['calls'],
817 htmlspecialchars( $item['name'] ) );
818 }
819
820 $parserOutput->setLimitReportData( 'limitreport-timingprofile', $profileReport );
821
822 // Add other cache related metadata
823 if ( $this->svcOptions->get( MainConfigNames::ShowHostnames ) ) {
824 $parserOutput->setLimitReportData( 'cachereport-origin', wfHostname() );
825 }
826 $parserOutput->setLimitReportData( 'cachereport-timestamp',
827 $parserOutput->getCacheTime() );
828 $parserOutput->setLimitReportData( 'cachereport-ttl',
829 $parserOutput->getCacheExpiry() );
830 $parserOutput->setLimitReportData( 'cachereport-transientcontent',
831 $parserOutput->hasReducedExpiry() );
832 }
833
859 public function recursiveTagParse( $text, $frame = false ) {
860 $text = $this->internalParse( $text, false, $frame );
861 return $text;
862 }
863
883 public function recursiveTagParseFully( $text, $frame = false ) {
884 $text = $this->recursiveTagParse( $text, $frame );
885 $text = $this->internalParseHalfParsed( $text, false );
886 return $text;
887 }
888
908 public function parseExtensionTagAsTopLevelDoc( $text ) {
909 $text = $this->recursiveTagParse( $text );
910 $this->hookRunner->onParserAfterParse( $this, $text, $this->mStripState );
911 $text = $this->internalParseHalfParsed( $text, true );
912 return $text;
913 }
914
927 public function preprocess(
928 $text,
929 ?PageReference $page,
930 ParserOptions $options,
931 $revid = null,
932 $frame = false
933 ) {
934 $magicScopeVariable = $this->lock();
935 $this->startParse( $page, $options, self::OT_PREPROCESS, true );
936 if ( $revid !== null ) {
937 $this->mRevisionId = $revid;
938 }
939 $this->hookRunner->onParserBeforePreprocess( $this, $text, $this->mStripState );
940 $text = $this->replaceVariables( $text, $frame );
941 $text = $this->mStripState->unstripBoth( $text );
942 return $text;
943 }
944
954 public function recursivePreprocess( $text, $frame = false ) {
955 $text = $this->replaceVariables( $text, $frame );
956 $text = $this->mStripState->unstripBoth( $text );
957 return $text;
958 }
959
974 public function getPreloadText( $text, PageReference $page, ParserOptions $options, $params = [] ) {
975 $msg = new RawMessage( $text );
976 $text = $msg->params( $params )->plain();
977
978 # Parser (re)initialisation
979 $magicScopeVariable = $this->lock();
980 $this->startParse( $page, $options, self::OT_PLAIN, true );
981
983 $dom = $this->preprocessToDom( $text, Preprocessor::DOM_FOR_INCLUSION );
984 $text = $this->getPreprocessor()->newFrame()->expand( $dom, $flags );
985 $text = $this->mStripState->unstripBoth( $text );
986 return $text;
987 }
988
996 public function setUser( ?UserIdentity $user ) {
997 $this->mUser = $user;
998 }
999
1007 public function setTitle( ?Title $t = null ) {
1008 $this->setPage( $t );
1009 }
1010
1016 public function getTitle(): Title {
1017 return $this->mTitle;
1018 }
1019
1026 public function setPage( ?PageReference $t = null ) {
1027 if ( !$t ) {
1028 $t = Title::makeTitle( NS_SPECIAL, 'Badtitle/Parser' );
1029 } else {
1030 // For now (early 1.37 alpha), always convert to Title, so we don't have to do it over
1031 // and over again in other methods. Eventually, we will no longer need to have a Title
1032 // instance internally.
1033 $t = Title::newFromPageReference( $t );
1034 }
1035
1036 if ( $t->hasFragment() ) {
1037 # Strip the fragment to avoid various odd effects
1038 $this->mTitle = $t->createFragmentTarget( '' );
1039 } else {
1040 $this->mTitle = $t;
1041 }
1042 }
1043
1049 public function getPage(): ?PageReference {
1050 if ( $this->mTitle->isSpecial( 'Badtitle' ) ) {
1051 [ , $subPage ] = $this->specialPageFactory->resolveAlias( $this->mTitle->getDBkey() );
1052
1053 if ( $subPage === 'Missing' ) {
1054 wfDeprecated( __METHOD__ . ' without a Title set', '1.34' );
1055 return null;
1056 }
1057 }
1058
1059 return $this->mTitle;
1060 }
1061
1067 public function getOutputType(): int {
1068 return $this->mOutputType;
1069 }
1070
1076 public function setOutputType( $ot ): void {
1077 $this->mOutputType = $ot;
1078 # Shortcut alias
1079 $this->ot = [
1080 'html' => $ot == self::OT_HTML,
1081 'wiki' => $ot == self::OT_WIKI,
1082 'pre' => $ot == self::OT_PREPROCESS,
1083 'plain' => $ot == self::OT_PLAIN,
1084 ];
1085 }
1086
1094 public function OutputType( $x = null ) {
1095 wfDeprecated( __METHOD__, '1.35' );
1096 return wfSetVar( $this->mOutputType, $x );
1097 }
1098
1103 public function getOutput() {
1104 // @phan-suppress-next-line PhanRedundantCondition False positive, see https://github.com/phan/phan/issues/4720
1105 if ( !isset( $this->mOutput ) ) {
1106 wfDeprecated( __METHOD__ . ' before initialization', '1.42' );
1107 // @phan-suppress-next-line PhanTypeMismatchReturnProbablyReal We don’t want to tell anyone we’re doing this
1108 return null;
1109 }
1110 return $this->mOutput;
1111 }
1112
1117 public function getOptions() {
1118 return $this->mOptions;
1119 }
1120
1126 public function setOptions( ParserOptions $options ): void {
1127 $this->mOptions = $options;
1128 }
1129
1137 public function Options( $x = null ) {
1138 wfDeprecated( __METHOD__, '1.35' );
1139 return wfSetVar( $this->mOptions, $x );
1140 }
1141
1146 public function nextLinkID() {
1147 return $this->mLinkID++;
1148 }
1149
1154 public function setLinkID( $id ) {
1155 $this->mLinkID = $id;
1156 }
1157
1164 public function getFunctionLang() {
1165 wfDeprecated( __METHOD__, '1.40' );
1166 return $this->getTargetLanguage();
1167 }
1168
1177 public function getTargetLanguage() {
1178 $target = $this->mOptions->getTargetLanguage();
1179
1180 if ( $target !== null ) {
1181 return $target;
1182 } elseif ( $this->mOptions->getInterfaceMessage() ) {
1183 return $this->mOptions->getUserLangObj();
1184 }
1185
1186 return $this->getTitle()->getPageLanguage();
1187 }
1188
1196 public function getUserIdentity(): UserIdentity {
1197 return $this->mUser ?? $this->getOptions()->getUserIdentity();
1198 }
1199
1206 public function getPreprocessor() {
1207 return $this->mPreprocessor;
1208 }
1209
1216 public function getLinkRenderer() {
1217 // XXX We make the LinkRenderer with current options and then cache it forever
1218 if ( !$this->mLinkRenderer ) {
1219 $this->mLinkRenderer = $this->linkRendererFactory->create();
1220 }
1221
1222 return $this->mLinkRenderer;
1223 }
1224
1231 public function getMagicWordFactory() {
1232 return $this->magicWordFactory;
1233 }
1234
1241 public function getContentLanguage() {
1242 return $this->contLang;
1243 }
1244
1251 public function getBadFileLookup() {
1252 return $this->badFileLookup;
1253 }
1254
1274 public static function extractTagsAndParams( array $elements, $text, &$matches ) {
1275 static $n = 1;
1276 $stripped = '';
1277 $matches = [];
1278
1279 $taglist = implode( '|', $elements );
1280 $start = "/<($taglist)(\\s+[^>]*?|\\s*?)(\/?>)|<(!--)/i";
1281
1282 while ( $text != '' ) {
1283 $p = preg_split( $start, $text, 2, PREG_SPLIT_DELIM_CAPTURE );
1284 $stripped .= $p[0];
1285 if ( count( $p ) < 5 ) {
1286 break;
1287 }
1288 if ( count( $p ) > 5 ) {
1289 # comment
1290 $element = $p[4];
1291 $attributes = '';
1292 $close = '';
1293 $inside = $p[5];
1294 } else {
1295 # tag
1296 [ , $element, $attributes, $close, $inside ] = $p;
1297 }
1298
1299 $marker = self::MARKER_PREFIX . "-$element-" . sprintf( '%08X', $n++ ) . self::MARKER_SUFFIX;
1300 $stripped .= $marker;
1301
1302 if ( $close === '/>' ) {
1303 # Empty element tag, <tag />
1304 $content = null;
1305 $text = $inside;
1306 $tail = null;
1307 } else {
1308 if ( $element === '!--' ) {
1309 $end = '/(-->)/';
1310 } else {
1311 $end = "/(<\\/$element\\s*>)/i";
1312 }
1313 $q = preg_split( $end, $inside, 2, PREG_SPLIT_DELIM_CAPTURE );
1314 $content = $q[0];
1315 if ( count( $q ) < 3 ) {
1316 # No end tag -- let it run out to the end of the text.
1317 $tail = '';
1318 $text = '';
1319 } else {
1320 [ , $tail, $text ] = $q;
1321 }
1322 }
1323
1324 $matches[$marker] = [ $element,
1325 $content,
1326 Sanitizer::decodeTagAttributes( $attributes ),
1327 "<$element$attributes$close$content$tail" ];
1328 }
1329 return $stripped;
1330 }
1331
1337 public function getStripList() {
1338 return $this->mStripList;
1339 }
1340
1345 public function getStripState() {
1346 return $this->mStripState;
1347 }
1348
1358 public function insertStripItem( $text ) {
1359 $marker = self::MARKER_PREFIX . "-item-{$this->mMarkerIndex}-" . self::MARKER_SUFFIX;
1360 $this->mMarkerIndex++;
1361 $this->mStripState->addGeneral( $marker, $text );
1362 return $marker;
1363 }
1364
1371 private function handleTables( $text ) {
1372 $lines = StringUtils::explode( "\n", $text );
1373 $out = '';
1374 $td_history = []; # Is currently a td tag open?
1375 $last_tag_history = []; # Save history of last lag activated (td, th or caption)
1376 $tr_history = []; # Is currently a tr tag open?
1377 $tr_attributes = []; # history of tr attributes
1378 $has_opened_tr = []; # Did this table open a <tr> element?
1379 $indent_level = 0; # indent level of the table
1380
1381 foreach ( $lines as $outLine ) {
1382 $line = trim( $outLine );
1383
1384 if ( $line === '' ) { # empty line, go to next line
1385 $out .= $outLine . "\n";
1386 continue;
1387 }
1388
1389 $first_character = $line[0];
1390 $first_two = substr( $line, 0, 2 );
1391 $matches = [];
1392
1393 if ( preg_match( '/^(:*)\s*\{\|(.*)$/', $line, $matches ) ) {
1394 # First check if we are starting a new table
1395 $indent_level = strlen( $matches[1] );
1396
1397 $attributes = $this->mStripState->unstripBoth( $matches[2] );
1398 $attributes = Sanitizer::fixTagAttributes( $attributes, 'table' );
1399
1400 $outLine = str_repeat( '<dl><dd>', $indent_level ) . "<table{$attributes}>";
1401 $td_history[] = false;
1402 $last_tag_history[] = '';
1403 $tr_history[] = false;
1404 $tr_attributes[] = '';
1405 $has_opened_tr[] = false;
1406 } elseif ( count( $td_history ) == 0 ) {
1407 # Don't do any of the following
1408 $out .= $outLine . "\n";
1409 continue;
1410 } elseif ( $first_two === '|}' ) {
1411 # We are ending a table
1412 $line = '</table>' . substr( $line, 2 );
1413 $last_tag = array_pop( $last_tag_history );
1414
1415 if ( !array_pop( $has_opened_tr ) ) {
1416 $line = "<tr><td></td></tr>{$line}";
1417 }
1418
1419 if ( array_pop( $tr_history ) ) {
1420 $line = "</tr>{$line}";
1421 }
1422
1423 if ( array_pop( $td_history ) ) {
1424 $line = "</{$last_tag}>{$line}";
1425 }
1426 array_pop( $tr_attributes );
1427 if ( $indent_level > 0 ) {
1428 $outLine = rtrim( $line ) . str_repeat( '</dd></dl>', $indent_level );
1429 } else {
1430 $outLine = $line;
1431 }
1432 } elseif ( $first_two === '|-' ) {
1433 # Now we have a table row
1434 $line = preg_replace( '#^\|-+#', '', $line );
1435
1436 # Whats after the tag is now only attributes
1437 $attributes = $this->mStripState->unstripBoth( $line );
1438 $attributes = Sanitizer::fixTagAttributes( $attributes, 'tr' );
1439 array_pop( $tr_attributes );
1440 $tr_attributes[] = $attributes;
1441
1442 $line = '';
1443 $last_tag = array_pop( $last_tag_history );
1444 array_pop( $has_opened_tr );
1445 $has_opened_tr[] = true;
1446
1447 if ( array_pop( $tr_history ) ) {
1448 $line = '</tr>';
1449 }
1450
1451 if ( array_pop( $td_history ) ) {
1452 $line = "</{$last_tag}>{$line}";
1453 }
1454
1455 $outLine = $line;
1456 $tr_history[] = false;
1457 $td_history[] = false;
1458 $last_tag_history[] = '';
1459 } elseif ( $first_character === '|'
1460 || $first_character === '!'
1461 || $first_two === '|+'
1462 ) {
1463 # This might be cell elements, td, th or captions
1464 if ( $first_two === '|+' ) {
1465 $first_character = '+';
1466 $line = substr( $line, 2 );
1467 } else {
1468 $line = substr( $line, 1 );
1469 }
1470
1471 // Implies both are valid for table headings.
1472 if ( $first_character === '!' ) {
1473 $line = StringUtils::replaceMarkup( '!!', '||', $line );
1474 }
1475
1476 # Split up multiple cells on the same line.
1477 # FIXME : This can result in improper nesting of tags processed
1478 # by earlier parser steps.
1479 $cells = explode( '||', $line );
1480
1481 $outLine = '';
1482
1483 # Loop through each table cell
1484 foreach ( $cells as $cell ) {
1485 $previous = '';
1486 if ( $first_character !== '+' ) {
1487 $tr_after = array_pop( $tr_attributes );
1488 if ( !array_pop( $tr_history ) ) {
1489 $previous = "<tr{$tr_after}>\n";
1490 }
1491 $tr_history[] = true;
1492 $tr_attributes[] = '';
1493 array_pop( $has_opened_tr );
1494 $has_opened_tr[] = true;
1495 }
1496
1497 $last_tag = array_pop( $last_tag_history );
1498
1499 if ( array_pop( $td_history ) ) {
1500 $previous = "</{$last_tag}>\n{$previous}";
1501 }
1502
1503 if ( $first_character === '|' ) {
1504 $last_tag = 'td';
1505 } elseif ( $first_character === '!' ) {
1506 $last_tag = 'th';
1507 } elseif ( $first_character === '+' ) {
1508 $last_tag = 'caption';
1509 } else {
1510 $last_tag = '';
1511 }
1512
1513 $last_tag_history[] = $last_tag;
1514
1515 # A cell could contain both parameters and data
1516 $cell_data = explode( '|', $cell, 2 );
1517
1518 # T2553: Note that a '|' inside an invalid link should not
1519 # be mistaken as delimiting cell parameters
1520 # Bug T153140: Neither should language converter markup.
1521 if ( preg_match( '/\[\[|-\{/', $cell_data[0] ) === 1 ) {
1522 $cell = "{$previous}<{$last_tag}>" . trim( $cell );
1523 } elseif ( count( $cell_data ) == 1 ) {
1524 // Whitespace in cells is trimmed
1525 $cell = "{$previous}<{$last_tag}>" . trim( $cell_data[0] );
1526 } else {
1527 $attributes = $this->mStripState->unstripBoth( $cell_data[0] );
1528 $attributes = Sanitizer::fixTagAttributes( $attributes, $last_tag );
1529 // Whitespace in cells is trimmed
1530 $cell = "{$previous}<{$last_tag}{$attributes}>" . trim( $cell_data[1] );
1531 }
1532
1533 $outLine .= $cell;
1534 $td_history[] = true;
1535 }
1536 }
1537 $out .= $outLine . "\n";
1538 }
1539
1540 # Closing open td, tr && table
1541 while ( count( $td_history ) > 0 ) {
1542 if ( array_pop( $td_history ) ) {
1543 $out .= "</td>\n";
1544 }
1545 if ( array_pop( $tr_history ) ) {
1546 $out .= "</tr>\n";
1547 }
1548 if ( !array_pop( $has_opened_tr ) ) {
1549 $out .= "<tr><td></td></tr>\n";
1550 }
1551
1552 $out .= "</table>\n";
1553 }
1554
1555 # Remove trailing line-ending (b/c)
1556 if ( substr( $out, -1 ) === "\n" ) {
1557 $out = substr( $out, 0, -1 );
1558 }
1559
1560 # special case: don't return empty table
1561 if ( $out === "<table>\n<tr><td></td></tr>\n</table>" ) {
1562 $out = '';
1563 }
1564
1565 return $out;
1566 }
1567
1581 public function internalParse( $text, $isMain = true, $frame = false ) {
1582 $origText = $text;
1583
1584 # Hook to suspend the parser in this state
1585 if ( !$this->hookRunner->onParserBeforeInternalParse( $this, $text, $this->mStripState ) ) {
1586 return $text;
1587 }
1588
1589 # if $frame is provided, then use $frame for replacing any variables
1590 if ( $frame ) {
1591 # use frame depth to infer how include/noinclude tags should be handled
1592 # depth=0 means this is the top-level document; otherwise it's an included document
1593 if ( !$frame->depth ) {
1594 $flag = 0;
1595 } else {
1596 $flag = Preprocessor::DOM_FOR_INCLUSION;
1597 }
1598 $dom = $this->preprocessToDom( $text, $flag );
1599 $text = $frame->expand( $dom );
1600 } else {
1601 # if $frame is not provided, then use old-style replaceVariables
1602 $text = $this->replaceVariables( $text );
1603 }
1604
1605 $text = Sanitizer::internalRemoveHtmlTags(
1606 $text,
1607 // Callback from the Sanitizer for expanding items found in
1608 // HTML attribute values, so they can be safely tested and escaped.
1609 function ( &$text, $frame = false ) {
1610 $text = $this->replaceVariables( $text, $frame );
1611 $text = $this->mStripState->unstripBoth( $text );
1612 },
1613 false,
1614 [],
1615 []
1616 );
1617 $this->hookRunner->onInternalParseBeforeLinks( $this, $text, $this->mStripState );
1618
1619 # Tables need to come after variable replacement for things to work
1620 # properly; putting them before other transformations should keep
1621 # exciting things like link expansions from showing up in surprising
1622 # places.
1623 $text = $this->handleTables( $text );
1624
1625 $text = preg_replace( '/(^|\n)-----*/', '\\1<hr />', $text );
1626
1627 $text = $this->handleDoubleUnderscore( $text );
1628
1629 $text = $this->handleHeadings( $text );
1630 $text = $this->handleInternalLinks( $text );
1631 $text = $this->handleAllQuotes( $text );
1632 $text = $this->handleExternalLinks( $text );
1633
1634 # handleInternalLinks may sometimes leave behind
1635 # absolute URLs, which have to be masked to hide them from handleExternalLinks
1636 $text = str_replace( self::MARKER_PREFIX . 'NOPARSE', '', $text );
1637
1638 $text = $this->handleMagicLinks( $text );
1639 $text = $this->finalizeHeadings( $text, $origText, $isMain );
1640
1641 return $text;
1642 }
1643
1651 return $this->languageConverterFactory->getLanguageConverter(
1652 $this->getTargetLanguage()
1653 );
1654 }
1655
1659 private function getContentLanguageConverter(): ILanguageConverter {
1660 return $this->languageConverterFactory->getLanguageConverter(
1661 $this->getContentLanguage()
1662 );
1663 }
1664
1672 protected function getHookContainer() {
1673 return $this->hookContainer;
1674 }
1675
1684 protected function getHookRunner() {
1685 return $this->hookRunner;
1686 }
1687
1697 private function internalParseHalfParsed( $text, $isMain = true, $linestart = true ) {
1698 $text = $this->mStripState->unstripGeneral( $text );
1699
1700 $text = BlockLevelPass::doBlockLevels( $text, $linestart );
1701
1702 $this->replaceLinkHoldersPrivate( $text );
1703
1711 $converter = null;
1712 if ( !( $this->mOptions->getDisableContentConversion()
1713 || isset( $this->mDoubleUnderscores['nocontentconvert'] )
1714 || $this->mOptions->getInterfaceMessage() )
1715 ) {
1716 # The position of the convert() call should not be changed. it
1717 # assumes that the links are all replaced and the only thing left
1718 # is the <nowiki> mark.
1719 $converter = $this->getTargetLanguageConverter();
1720 $text = $converter->convert( $text );
1721 // TOC will be converted below.
1722 }
1723 // Convert the TOC. This is done *after* the main text
1724 // so that all the editor-defined conversion rules (by convention
1725 // defined at the start of the article) are applied to the TOC
1726 self::localizeTOC(
1727 $this->mOutput->getTOCData(),
1728 $this->getTargetLanguage(),
1729 $converter // null if conversion is to be suppressed.
1730 );
1731 if ( $converter ) {
1732 $this->mOutput->setLanguage( new Bcp47CodeValue(
1733 LanguageCode::bcp47( $converter->getPreferredVariant() )
1734 ) );
1735 } else {
1736 $this->mOutput->setLanguage( $this->getTargetLanguage() );
1737 }
1738
1739 $text = $this->mStripState->unstripNoWiki( $text );
1740
1741 $text = $this->mStripState->unstripGeneral( $text );
1742
1743 $text = $this->tidy->tidy( $text, [ Sanitizer::class, 'armorFrenchSpaces' ] );
1744
1745 if ( $isMain ) {
1746 $this->hookRunner->onParserAfterTidy( $this, $text );
1747 }
1748
1749 return $text;
1750 }
1751
1762 private function handleMagicLinks( $text ) {
1763 $prots = $this->urlUtils->validAbsoluteProtocols();
1764 $urlChar = self::EXT_LINK_URL_CLASS;
1765 $addr = self::EXT_LINK_ADDR;
1766 $space = self::SPACE_NOT_NL; # non-newline space
1767 $spdash = "(?:-|$space)"; # a dash or a non-newline space
1768 $spaces = "$space++"; # possessive match of 1 or more spaces
1769 $text = preg_replace_callback(
1770 '!(?: # Start cases
1771 (<a[ \t\r\n>].*?</a>) | # m[1]: Skip link text
1772 (<.*?>) | # m[2]: Skip stuff inside HTML elements' . "
1773 (\b # m[3]: Free external links
1774 (?i:$prots)
1775 ($addr$urlChar*) # m[4]: Post-protocol path
1776 ) |
1777 \b(?:RFC|PMID) $spaces # m[5]: RFC or PMID, capture number
1778 ([0-9]+)\b |
1779 \bISBN $spaces ( # m[6]: ISBN, capture number
1780 (?: 97[89] $spdash? )? # optional 13-digit ISBN prefix
1781 (?: [0-9] $spdash? ){9} # 9 digits with opt. delimiters
1782 [0-9Xx] # check digit
1783 )\b
1784 )!xu",
1785 [ $this, 'magicLinkCallback' ],
1786 $text
1787 );
1788 return $text;
1789 }
1790
1795 private function magicLinkCallback( array $m ) {
1796 if ( isset( $m[1] ) && $m[1] !== '' ) {
1797 # Skip anchor
1798 return $m[0];
1799 } elseif ( isset( $m[2] ) && $m[2] !== '' ) {
1800 # Skip HTML element
1801 return $m[0];
1802 } elseif ( isset( $m[3] ) && $m[3] !== '' ) {
1803 # Free external link
1804 return $this->makeFreeExternalLink( $m[0], strlen( $m[4] ) );
1805 } elseif ( isset( $m[5] ) && $m[5] !== '' ) {
1806 # RFC or PMID
1807 if ( substr( $m[0], 0, 3 ) === 'RFC' ) {
1808 if ( !$this->mOptions->getMagicRFCLinks() ) {
1809 return $m[0];
1810 }
1811 $keyword = 'RFC';
1812 $urlmsg = 'rfcurl';
1813 $cssClass = 'mw-magiclink-rfc';
1814 $trackingCat = 'magiclink-tracking-rfc';
1815 $id = $m[5];
1816 } elseif ( substr( $m[0], 0, 4 ) === 'PMID' ) {
1817 if ( !$this->mOptions->getMagicPMIDLinks() ) {
1818 return $m[0];
1819 }
1820 $keyword = 'PMID';
1821 $urlmsg = 'pubmedurl';
1822 $cssClass = 'mw-magiclink-pmid';
1823 $trackingCat = 'magiclink-tracking-pmid';
1824 $id = $m[5];
1825 } else {
1826 // Should never happen
1827 throw new UnexpectedValueException( __METHOD__ . ': unrecognised match type "' .
1828 substr( $m[0], 0, 20 ) . '"' );
1829 }
1830 $url = wfMessage( $urlmsg, $id )->inContentLanguage()->text();
1831 $this->addTrackingCategory( $trackingCat );
1832 return $this->getLinkRenderer()->makeExternalLink(
1833 $url,
1834 "{$keyword} {$id}",
1835 $this->getTitle(),
1836 $cssClass,
1837 []
1838 );
1839 } elseif ( isset( $m[6] ) && $m[6] !== ''
1840 && $this->mOptions->getMagicISBNLinks()
1841 ) {
1842 # ISBN
1843 $isbn = $m[6];
1844 $space = self::SPACE_NOT_NL; # non-newline space
1845 $isbn = preg_replace( "/$space/", ' ', $isbn );
1846 $num = strtr( $isbn, [
1847 '-' => '',
1848 ' ' => '',
1849 'x' => 'X',
1850 ] );
1851 $this->addTrackingCategory( 'magiclink-tracking-isbn' );
1852 return $this->getLinkRenderer()->makeKnownLink(
1853 SpecialPage::getTitleFor( 'Booksources', $num ),
1854 "ISBN $isbn",
1855 [
1856 'class' => 'internal mw-magiclink-isbn',
1857 'title' => false // suppress title attribute
1858 ]
1859 );
1860 } else {
1861 return $m[0];
1862 }
1863 }
1864
1874 private function makeFreeExternalLink( $url, $numPostProto ) {
1875 $trail = '';
1876
1877 # The characters '<' and '>' (which were escaped by
1878 # internalRemoveHtmlTags()) should not be included in
1879 # URLs, per RFC 2396.
1880 # Make &nbsp; terminate a URL as well (bug T84937)
1881 $m2 = [];
1882 if ( preg_match(
1883 '/&(lt|gt|nbsp|#x0*(3[CcEe]|[Aa]0)|#0*(60|62|160));/',
1884 $url,
1885 $m2,
1886 PREG_OFFSET_CAPTURE
1887 ) ) {
1888 $trail = substr( $url, $m2[0][1] ) . $trail;
1889 $url = substr( $url, 0, $m2[0][1] );
1890 }
1891
1892 # Move trailing punctuation to $trail
1893 $sep = ',;\.:!?';
1894 # If there is no left bracket, then consider right brackets fair game too
1895 if ( strpos( $url, '(' ) === false ) {
1896 $sep .= ')';
1897 }
1898
1899 $urlRev = strrev( $url );
1900 $numSepChars = strspn( $urlRev, $sep );
1901 # Don't break a trailing HTML entity by moving the ; into $trail
1902 # This is in hot code, so use substr_compare to avoid having to
1903 # create a new string object for the comparison
1904 if ( $numSepChars && substr_compare( $url, ";", -$numSepChars, 1 ) === 0 ) {
1905 # more optimization: instead of running preg_match with a $
1906 # anchor, which can be slow, do the match on the reversed
1907 # string starting at the desired offset.
1908 # un-reversed regexp is: /&([a-z]+|#x[\da-f]+|#\d+)$/i
1909 if ( preg_match( '/\G([a-z]+|[\da-f]+x#|\d+#)&/i', $urlRev, $m2, 0, $numSepChars ) ) {
1910 $numSepChars--;
1911 }
1912 }
1913 if ( $numSepChars ) {
1914 $trail = substr( $url, -$numSepChars ) . $trail;
1915 $url = substr( $url, 0, -$numSepChars );
1916 }
1917
1918 # Verify that we still have a real URL after trail removal, and
1919 # not just lone protocol
1920 if ( strlen( $trail ) >= $numPostProto ) {
1921 return $url . $trail;
1922 }
1923
1924 $url = Sanitizer::cleanUrl( $url );
1925
1926 # Is this an external image?
1927 $text = $this->maybeMakeExternalImage( $url );
1928 if ( $text === false ) {
1929 # Not an image, make a link
1930 $text = $this->getLinkRenderer()->makeExternalLink(
1931 $url,
1932 $this->getTargetLanguageConverter()->markNoConversion( $url ),
1933 $this->getTitle(),
1934 'free',
1935 $this->getExternalLinkAttribs( $url )
1936 );
1937 # Register it in the output object...
1938 $this->mOutput->addExternalLink( $url );
1939 }
1940 return $text . $trail;
1941 }
1942
1949 private function handleHeadings( $text ) {
1950 for ( $i = 6; $i >= 1; --$i ) {
1951 $h = str_repeat( '=', $i );
1952 // Trim non-newline whitespace from headings
1953 // Using \s* will break for: "==\n===\n" and parse as <h2>=</h2>
1954 $text = preg_replace( "/^(?:$h)[ \\t]*(.+?)[ \\t]*(?:$h)\\s*$/m", "<h$i>\\1</h$i>", $text );
1955 }
1956 return $text;
1957 }
1958
1966 private function handleAllQuotes( $text ) {
1967 $outtext = '';
1968 $lines = StringUtils::explode( "\n", $text );
1969 foreach ( $lines as $line ) {
1970 $outtext .= $this->doQuotes( $line ) . "\n";
1971 }
1972 $outtext = substr( $outtext, 0, -1 );
1973 return $outtext;
1974 }
1975
1984 public function doQuotes( $text ) {
1985 $arr = preg_split( "/(''+)/", $text, -1, PREG_SPLIT_DELIM_CAPTURE );
1986 $countarr = count( $arr );
1987 if ( $countarr == 1 ) {
1988 return $text;
1989 }
1990
1991 // First, do some preliminary work. This may shift some apostrophes from
1992 // being mark-up to being text. It also counts the number of occurrences
1993 // of bold and italics mark-ups.
1994 $numbold = 0;
1995 $numitalics = 0;
1996 for ( $i = 1; $i < $countarr; $i += 2 ) {
1997 $thislen = strlen( $arr[$i] );
1998 // If there are ever four apostrophes, assume the first is supposed to
1999 // be text, and the remaining three constitute mark-up for bold text.
2000 // (T15227: ''''foo'''' turns into ' ''' foo ' ''')
2001 if ( $thislen == 4 ) {
2002 $arr[$i - 1] .= "'";
2003 $arr[$i] = "'''";
2004 $thislen = 3;
2005 } elseif ( $thislen > 5 ) {
2006 // If there are more than 5 apostrophes in a row, assume they're all
2007 // text except for the last 5.
2008 // (T15227: ''''''foo'''''' turns into ' ''''' foo ' ''''')
2009 $arr[$i - 1] .= str_repeat( "'", $thislen - 5 );
2010 $arr[$i] = "'''''";
2011 $thislen = 5;
2012 }
2013 // Count the number of occurrences of bold and italics mark-ups.
2014 if ( $thislen == 2 ) {
2015 $numitalics++;
2016 } elseif ( $thislen == 3 ) {
2017 $numbold++;
2018 } elseif ( $thislen == 5 ) {
2019 $numitalics++;
2020 $numbold++;
2021 }
2022 }
2023
2024 // If there is an odd number of both bold and italics, it is likely
2025 // that one of the bold ones was meant to be an apostrophe followed
2026 // by italics. Which one we cannot know for certain, but it is more
2027 // likely to be one that has a single-letter word before it.
2028 if ( ( $numbold % 2 == 1 ) && ( $numitalics % 2 == 1 ) ) {
2029 $firstsingleletterword = -1;
2030 $firstmultiletterword = -1;
2031 $firstspace = -1;
2032 for ( $i = 1; $i < $countarr; $i += 2 ) {
2033 if ( strlen( $arr[$i] ) == 3 ) {
2034 $x1 = substr( $arr[$i - 1], -1 );
2035 $x2 = substr( $arr[$i - 1], -2, 1 );
2036 if ( $x1 === ' ' ) {
2037 if ( $firstspace == -1 ) {
2038 $firstspace = $i;
2039 }
2040 } elseif ( $x2 === ' ' ) {
2041 $firstsingleletterword = $i;
2042 // if $firstsingleletterword is set, we don't
2043 // look at the other options, so we can bail early.
2044 break;
2045 } elseif ( $firstmultiletterword == -1 ) {
2046 $firstmultiletterword = $i;
2047 }
2048 }
2049 }
2050
2051 // If there is a single-letter word, use it!
2052 if ( $firstsingleletterword > -1 ) {
2053 $arr[$firstsingleletterword] = "''";
2054 $arr[$firstsingleletterword - 1] .= "'";
2055 } elseif ( $firstmultiletterword > -1 ) {
2056 // If not, but there's a multi-letter word, use that one.
2057 $arr[$firstmultiletterword] = "''";
2058 $arr[$firstmultiletterword - 1] .= "'";
2059 } elseif ( $firstspace > -1 ) {
2060 // ... otherwise use the first one that has neither.
2061 // (notice that it is possible for all three to be -1 if, for example,
2062 // there is only one pentuple-apostrophe in the line)
2063 $arr[$firstspace] = "''";
2064 $arr[$firstspace - 1] .= "'";
2065 }
2066 }
2067
2068 // Now let's actually convert our apostrophic mush to HTML!
2069 $output = '';
2070 $buffer = '';
2071 $state = '';
2072 $i = 0;
2073 foreach ( $arr as $r ) {
2074 if ( ( $i % 2 ) == 0 ) {
2075 if ( $state === 'both' ) {
2076 $buffer .= $r;
2077 } else {
2078 $output .= $r;
2079 }
2080 } else {
2081 $thislen = strlen( $r );
2082 if ( $thislen == 2 ) {
2083 // two quotes - open or close italics
2084 if ( $state === 'i' ) {
2085 $output .= '</i>';
2086 $state = '';
2087 } elseif ( $state === 'bi' ) {
2088 $output .= '</i>';
2089 $state = 'b';
2090 } elseif ( $state === 'ib' ) {
2091 $output .= '</b></i><b>';
2092 $state = 'b';
2093 } elseif ( $state === 'both' ) {
2094 $output .= '<b><i>' . $buffer . '</i>';
2095 $state = 'b';
2096 } else { // $state can be 'b' or ''
2097 $output .= '<i>';
2098 $state .= 'i';
2099 }
2100 } elseif ( $thislen == 3 ) {
2101 // three quotes - open or close bold
2102 if ( $state === 'b' ) {
2103 $output .= '</b>';
2104 $state = '';
2105 } elseif ( $state === 'bi' ) {
2106 $output .= '</i></b><i>';
2107 $state = 'i';
2108 } elseif ( $state === 'ib' ) {
2109 $output .= '</b>';
2110 $state = 'i';
2111 } elseif ( $state === 'both' ) {
2112 $output .= '<i><b>' . $buffer . '</b>';
2113 $state = 'i';
2114 } else { // $state can be 'i' or ''
2115 $output .= '<b>';
2116 $state .= 'b';
2117 }
2118 } elseif ( $thislen == 5 ) {
2119 // five quotes - open or close both separately
2120 if ( $state === 'b' ) {
2121 $output .= '</b><i>';
2122 $state = 'i';
2123 } elseif ( $state === 'i' ) {
2124 $output .= '</i><b>';
2125 $state = 'b';
2126 } elseif ( $state === 'bi' ) {
2127 $output .= '</i></b>';
2128 $state = '';
2129 } elseif ( $state === 'ib' ) {
2130 $output .= '</b></i>';
2131 $state = '';
2132 } elseif ( $state === 'both' ) {
2133 $output .= '<i><b>' . $buffer . '</b></i>';
2134 $state = '';
2135 } else { // ($state == '')
2136 $buffer = '';
2137 $state = 'both';
2138 }
2139 }
2140 }
2141 $i++;
2142 }
2143 // Now close all remaining tags. Notice that the order is important.
2144 if ( $state === 'b' || $state === 'ib' ) {
2145 $output .= '</b>';
2146 }
2147 if ( $state === 'i' || $state === 'bi' || $state === 'ib' ) {
2148 $output .= '</i>';
2149 }
2150 if ( $state === 'bi' ) {
2151 $output .= '</b>';
2152 }
2153 // There might be lonely ''''', so make sure we have a buffer
2154 if ( $state === 'both' && $buffer ) {
2155 $output .= '<b><i>' . $buffer . '</i></b>';
2156 }
2157 return $output;
2158 }
2159
2169 private function handleExternalLinks( $text ) {
2170 $bits = preg_split( $this->mExtLinkBracketedRegex, $text, -1, PREG_SPLIT_DELIM_CAPTURE );
2171 if ( $bits === false ) {
2172 throw new RuntimeException( "PCRE failure" );
2173 }
2174 $s = array_shift( $bits );
2175
2176 $i = 0;
2177 while ( $i < count( $bits ) ) {
2178 $url = $bits[$i++];
2179 $i++; // protocol
2180 $text = $bits[$i++];
2181 $trail = $bits[$i++];
2182
2183 # The characters '<' and '>' (which were escaped by
2184 # internalRemoveHtmlTags()) should not be included in
2185 # URLs, per RFC 2396.
2186 $m2 = [];
2187 if ( preg_match( '/&(lt|gt);/', $url, $m2, PREG_OFFSET_CAPTURE ) ) {
2188 $text = substr( $url, $m2[0][1] ) . ' ' . $text;
2189 $url = substr( $url, 0, $m2[0][1] );
2190 }
2191
2192 # If the link text is an image URL, replace it with an <img> tag
2193 # This happened by accident in the original parser, but some people used it extensively
2194 $img = $this->maybeMakeExternalImage( $text );
2195 if ( $img !== false ) {
2196 $text = $img;
2197 }
2198
2199 $dtrail = '';
2200
2201 # Set linktype for CSS
2202 $linktype = 'text';
2203
2204 # No link text, e.g. [http://domain.tld/some.link]
2205 if ( $text == '' ) {
2206 # Autonumber
2207 $langObj = $this->getTargetLanguage();
2208 $text = '[' . $langObj->formatNum( ++$this->mAutonumber ) . ']';
2209 $linktype = 'autonumber';
2210 } else {
2211 # Have link text, e.g. [http://domain.tld/some.link text]s
2212 # Check for trail
2213 [ $dtrail, $trail ] = Linker::splitTrail( $trail );
2214 }
2215
2216 // Excluding protocol-relative URLs may avoid many false positives.
2217 if ( preg_match( '/^(?:' . $this->urlUtils->validAbsoluteProtocols() . ')/', $text ) ) {
2218 $text = $this->getTargetLanguageConverter()->markNoConversion( $text );
2219 }
2220
2221 $url = Sanitizer::cleanUrl( $url );
2222
2223 # Use the encoded URL
2224 # This means that users can paste URLs directly into the text
2225 # Funny characters like ö aren't valid in URLs anyway
2226 # This was changed in August 2004
2227 $s .= $this->getLinkRenderer()->makeExternalLink(
2228 $url,
2229 // @phan-suppress-next-line SecurityCheck-XSS
2230 new HtmlArmor( $text ),
2231 $this->getTitle(),
2232 $linktype,
2233 $this->getExternalLinkAttribs( $url )
2234 ) . $dtrail . $trail;
2235
2236 # Register link in the output object.
2237 $this->mOutput->addExternalLink( $url );
2238 }
2239
2240 // @phan-suppress-next-line PhanTypeMismatchReturnNullable False positive from array_shift
2241 return $s;
2242 }
2243
2254 public static function getExternalLinkRel( $url = false, $title = null ) {
2255 $mainConfig = MediaWikiServices::getInstance()->getMainConfig();
2256 $noFollowLinks = $mainConfig->get( MainConfigNames::NoFollowLinks );
2257 $noFollowNsExceptions = $mainConfig->get( MainConfigNames::NoFollowNsExceptions );
2258 $noFollowDomainExceptions = $mainConfig->get( MainConfigNames::NoFollowDomainExceptions );
2259 $ns = $title ? $title->getNamespace() : false;
2260 if (
2261 $noFollowLinks && !in_array( $ns, $noFollowNsExceptions )
2262 && !wfGetUrlUtils()->matchesDomainList( (string)$url, $noFollowDomainExceptions )
2263 ) {
2264 return 'nofollow';
2265 }
2266 return null;
2267 }
2268
2280 public function getExternalLinkAttribs( $url ) {
2281 $attribs = [];
2282 $rel = self::getExternalLinkRel( $url, $this->getTitle() ) ?? '';
2283
2284 $target = $this->mOptions->getExternalLinkTarget();
2285 if ( $target ) {
2286 $attribs['target'] = $target;
2287 if ( !in_array( $target, [ '_self', '_parent', '_top' ] ) ) {
2288 // T133507. New windows can navigate parent cross-origin.
2289 // Including noreferrer due to lacking browser
2290 // support of noopener. Eventually noreferrer should be removed.
2291 if ( $rel !== '' ) {
2292 $rel .= ' ';
2293 }
2294 $rel .= 'noreferrer noopener';
2295 }
2296 }
2297 if ( $rel !== '' ) {
2298 $attribs['rel'] = $rel;
2299 }
2300 return $attribs;
2301 }
2302
2313 public static function normalizeLinkUrl( $url ) {
2314 # Test for RFC 3986 IPv6 syntax
2315 $scheme = '[a-z][a-z0-9+.-]*:';
2316 $userinfo = '(?:[a-z0-9\-._~!$&\'()*+,;=:]|%[0-9a-f]{2})*';
2317 $ipv6Host = '\\[((?:[0-9a-f:]|%3[0-A]|%[46][1-6])+)\\]';
2318 if ( preg_match( "<^(?:{$scheme})?//(?:{$userinfo}@)?{$ipv6Host}(?:[:/?#].*|)$>i", $url, $m ) &&
2319 IPUtils::isValid( rawurldecode( $m[1] ) )
2320 ) {
2321 $isIPv6 = rawurldecode( $m[1] );
2322 } else {
2323 $isIPv6 = false;
2324 }
2325
2326 # Make sure unsafe characters are encoded
2327 $url = preg_replace_callback(
2328 '/[\x00-\x20"<>\[\\\\\]^`{|}\x7F-\xFF]+/',
2329 static fn ( $m ) => rawurlencode( $m[0] ),
2330 $url
2331 );
2332
2333 $ret = '';
2334 $end = strlen( $url );
2335
2336 # Fragment part - 'fragment'
2337 $start = strpos( $url, '#' );
2338 if ( $start !== false && $start < $end ) {
2339 $ret = self::normalizeUrlComponent(
2340 substr( $url, $start, $end - $start ), '"#%<>[\]^`{|}' ) . $ret;
2341 $end = $start;
2342 }
2343
2344 # Query part - 'query' minus &=+;
2345 $start = strpos( $url, '?' );
2346 if ( $start !== false && $start < $end ) {
2347 $ret = self::normalizeUrlComponent(
2348 substr( $url, $start, $end - $start ), '"#%<>[\]^`{|}&=+;' ) . $ret;
2349 $end = $start;
2350 }
2351
2352 # Path part - 'pchar', remove dot segments
2353 # (find first '/' after the optional '//' after the scheme)
2354 $start = strpos( $url, '//' );
2355 $start = strpos( $url, '/', $start === false ? 0 : $start + 2 );
2356 if ( $start !== false && $start < $end ) {
2357 $ret = UrlUtils::removeDotSegments( self::normalizeUrlComponent(
2358 substr( $url, $start, $end - $start ), '"#%<>[\]^`{|}/?' ) ) . $ret;
2359 $end = $start;
2360 }
2361
2362 # Scheme and host part - 'pchar'
2363 # (we assume no userinfo or encoded colons in the host)
2364 $ret = self::normalizeUrlComponent(
2365 substr( $url, 0, $end ), '"#%<>[\]^`{|}/?' ) . $ret;
2366
2367 # Fix IPv6 syntax
2368 if ( $isIPv6 !== false ) {
2369 $ipv6Host = "%5B({$isIPv6})%5D";
2370 $ret = preg_replace(
2371 "<^((?:{$scheme})?//(?:{$userinfo}@)?){$ipv6Host}(?=[:/?#]|$)>i",
2372 "$1[$2]",
2373 $ret
2374 );
2375 }
2376
2377 return $ret;
2378 }
2379
2380 private static function normalizeUrlComponent( $component, $unsafe ) {
2381 $callback = static function ( $matches ) use ( $unsafe ) {
2382 $char = urldecode( $matches[0] );
2383 $ord = ord( $char );
2384 if ( $ord > 32 && $ord < 127 && strpos( $unsafe, $char ) === false ) {
2385 # Unescape it
2386 return $char;
2387 } else {
2388 # Leave it escaped, but use uppercase for a-f
2389 return strtoupper( $matches[0] );
2390 }
2391 };
2392 return preg_replace_callback( '/%[0-9A-Fa-f]{2}/', $callback, $component );
2393 }
2394
2403 private function maybeMakeExternalImage( $url ) {
2404 $imagesfrom = $this->mOptions->getAllowExternalImagesFrom();
2405 $imagesexception = (bool)$imagesfrom;
2406 $text = false;
2407 # $imagesfrom could be either a single string or an array of strings, parse out the latter
2408 if ( $imagesexception && is_array( $imagesfrom ) ) {
2409 $imagematch = false;
2410 foreach ( $imagesfrom as $match ) {
2411 if ( strpos( $url, $match ) === 0 ) {
2412 $imagematch = true;
2413 break;
2414 }
2415 }
2416 } elseif ( $imagesexception ) {
2417 $imagematch = ( strpos( $url, $imagesfrom ) === 0 );
2418 } else {
2419 $imagematch = false;
2420 }
2421
2422 if ( $this->mOptions->getAllowExternalImages()
2423 || ( $imagesexception && $imagematch )
2424 ) {
2425 if ( preg_match( self::EXT_IMAGE_REGEX, $url ) ) {
2426 # Image found
2427 $text = Linker::makeExternalImage( $url );
2428 }
2429 }
2430 if ( !$text && $this->mOptions->getEnableImageWhitelist()
2431 && preg_match( self::EXT_IMAGE_REGEX, $url )
2432 ) {
2433 $whitelist = explode(
2434 "\n",
2435 wfMessage( 'external_image_whitelist' )->inContentLanguage()->text()
2436 );
2437
2438 foreach ( $whitelist as $entry ) {
2439 # Sanitize the regex fragment, make it case-insensitive, ignore blank entries/comments
2440 if ( strpos( $entry, '#' ) === 0 || $entry === '' ) {
2441 continue;
2442 }
2443 // @phan-suppress-next-line SecurityCheck-ReDoS preg_quote is not wanted here
2444 if ( preg_match( '/' . str_replace( '/', '\\/', $entry ) . '/i', $url ) ) {
2445 # Image matches a whitelist entry
2446 $text = Linker::makeExternalImage( $url );
2447 break;
2448 }
2449 }
2450 }
2451 return $text;
2452 }
2453
2461 private function handleInternalLinks( $text ) {
2462 $this->mLinkHolders->merge( $this->handleInternalLinks2( $text ) );
2463 return $text;
2464 }
2465
2471 private function handleInternalLinks2( &$s ) {
2472 static $tc = false, $e1, $e1_img;
2473 # the % is needed to support urlencoded titles as well
2474 if ( !$tc ) {
2475 $tc = Title::legalChars() . '#%';
2476 # Match a link having the form [[namespace:link|alternate]]trail
2477 $e1 = "/^([{$tc}]+)(?:\\|(.+?))?]](.*)\$/sD";
2478 # Match cases where there is no "]]", which might still be images
2479 $e1_img = "/^([{$tc}]+)\\|(.*)\$/sD";
2480 }
2481
2482 $holders = new LinkHolderArray(
2483 $this,
2484 $this->getContentLanguageConverter(),
2485 $this->getHookContainer() );
2486
2487 # split the entire text string on occurrences of [[
2488 $a = StringUtils::explode( '[[', ' ' . $s );
2489 # get the first element (all text up to first [[), and remove the space we added
2490 $s = $a->current();
2491 $a->next();
2492 $line = $a->current(); # Workaround for broken ArrayIterator::next() that returns "void"
2493 $s = substr( $s, 1 );
2494
2495 $nottalk = !$this->getTitle()->isTalkPage();
2496
2497 $useLinkPrefixExtension = $this->getTargetLanguage()->linkPrefixExtension();
2498 $e2 = null;
2499 if ( $useLinkPrefixExtension ) {
2500 # Match the end of a line for a word that's not followed by whitespace,
2501 # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
2502 $charset = $this->contLang->linkPrefixCharset();
2503 $e2 = "/^((?>.*[^$charset]|))(.+)$/sDu";
2504 $m = [];
2505 if ( preg_match( $e2, $s, $m ) ) {
2506 $first_prefix = $m[2];
2507 } else {
2508 $first_prefix = false;
2509 }
2510 $prefix = false;
2511 } else {
2512 $first_prefix = false;
2513 $prefix = '';
2514 }
2515
2516 # Some namespaces don't allow subpages
2517 $useSubpages = $this->nsInfo->hasSubpages(
2518 $this->getTitle()->getNamespace()
2519 );
2520
2521 # Loop for each link
2522 for ( ; $line !== false && $line !== null; $a->next(), $line = $a->current() ) {
2523 # Check for excessive memory usage
2524 if ( $holders->isBig() ) {
2525 # Too big
2526 # Do the existence check, replace the link holders and clear the array
2527 $holders->replace( $s );
2528 $holders->clear();
2529 }
2530
2531 if ( $useLinkPrefixExtension ) {
2532 // @phan-suppress-next-line PhanTypeMismatchArgumentNullableInternal $e2 is set under this condition
2533 if ( preg_match( $e2, $s, $m ) ) {
2534 [ , $s, $prefix ] = $m;
2535 } else {
2536 $prefix = '';
2537 }
2538 # first link
2539 if ( $first_prefix ) {
2540 $prefix = $first_prefix;
2541 $first_prefix = false;
2542 }
2543 }
2544
2545 $might_be_img = false;
2546
2547 if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
2548 $text = $m[2];
2549 # If we get a ] at the beginning of $m[3] that means we have a link that's something like:
2550 # [[Image:Foo.jpg|[http://example.com desc]]] <- having three ] in a row fucks up,
2551 # the real problem is with the $e1 regex
2552 # See T1500.
2553 # Still some problems for cases where the ] is meant to be outside punctuation,
2554 # and no image is in sight. See T4095.
2555 if ( $text !== ''
2556 && substr( $m[3], 0, 1 ) === ']'
2557 && strpos( $text, '[' ) !== false
2558 ) {
2559 $text .= ']'; # so that handleExternalLinks($text) works later
2560 $m[3] = substr( $m[3], 1 );
2561 }
2562 # fix up urlencoded title texts
2563 if ( strpos( $m[1], '%' ) !== false ) {
2564 # Should anchors '#' also be rejected?
2565 $m[1] = str_replace( [ '<', '>' ], [ '&lt;', '&gt;' ], rawurldecode( $m[1] ) );
2566 }
2567 $trail = $m[3];
2568 } elseif ( preg_match( $e1_img, $line, $m ) ) {
2569 # Invalid, but might be an image with a link in its caption
2570 $might_be_img = true;
2571 $text = $m[2];
2572 if ( strpos( $m[1], '%' ) !== false ) {
2573 $m[1] = str_replace( [ '<', '>' ], [ '&lt;', '&gt;' ], rawurldecode( $m[1] ) );
2574 }
2575 $trail = "";
2576 } else { # Invalid form; output directly
2577 $s .= $prefix . '[[' . $line;
2578 continue;
2579 }
2580
2581 // @phan-suppress-next-line PhanTypePossiblyInvalidDimOffset preg_match success when reached here
2582 $origLink = ltrim( $m[1], ' ' );
2583
2584 # Don't allow internal links to pages containing
2585 # PROTO: where PROTO is a valid URL protocol; these
2586 # should be external links.
2587 if ( preg_match( '/^(?i:' . $this->urlUtils->validProtocols() . ')/', $origLink ) ) {
2588 $s .= $prefix . '[[' . $line;
2589 continue;
2590 }
2591
2592 # Make subpage if necessary
2593 if ( $useSubpages ) {
2594 $link = Linker::normalizeSubpageLink(
2595 $this->getTitle(), $origLink, $text
2596 );
2597 } else {
2598 $link = $origLink;
2599 }
2600
2601 // \x7f isn't a default legal title char, so most likely strip
2602 // markers will force us into the "invalid form" path above. But,
2603 // just in case, let's assert that xmlish tags aren't valid in
2604 // the title position.
2605 $unstrip = $this->mStripState->killMarkers( $link );
2606 $noMarkers = ( $unstrip === $link );
2607
2608 $nt = $noMarkers ? Title::newFromText( $link ) : null;
2609 if ( $nt === null ) {
2610 $s .= $prefix . '[[' . $line;
2611 continue;
2612 }
2613
2614 $ns = $nt->getNamespace();
2615 $iw = $nt->getInterwiki();
2616
2617 $noforce = ( substr( $origLink, 0, 1 ) !== ':' );
2618
2619 if ( $might_be_img ) { # if this is actually an invalid link
2620 if ( $ns === NS_FILE && $noforce ) { # but might be an image
2621 $found = false;
2622 while ( true ) {
2623 # look at the next 'line' to see if we can close it there
2624 $a->next();
2625 $next_line = $a->current();
2626 if ( $next_line === false || $next_line === null ) {
2627 break;
2628 }
2629 $m = explode( ']]', $next_line, 3 );
2630 if ( count( $m ) == 3 ) {
2631 # the first ]] closes the inner link, the second the image
2632 $found = true;
2633 $text .= "[[{$m[0]}]]{$m[1]}";
2634 $trail = $m[2];
2635 break;
2636 } elseif ( count( $m ) == 2 ) {
2637 # if there's exactly one ]] that's fine, we'll keep looking
2638 $text .= "[[{$m[0]}]]{$m[1]}";
2639 } else {
2640 # if $next_line is invalid too, we need look no further
2641 $text .= '[[' . $next_line;
2642 break;
2643 }
2644 }
2645 if ( !$found ) {
2646 # we couldn't find the end of this imageLink, so output it raw
2647 # but don't ignore what might be perfectly normal links in the text we've examined
2648 $holders->merge( $this->handleInternalLinks2( $text ) );
2649 $s .= "{$prefix}[[$link|$text";
2650 # note: no $trail, because without an end, there *is* no trail
2651 continue;
2652 }
2653 } else { # it's not an image, so output it raw
2654 $s .= "{$prefix}[[$link|$text";
2655 # note: no $trail, because without an end, there *is* no trail
2656 continue;
2657 }
2658 }
2659
2660 $wasblank = ( $text == '' );
2661 if ( $wasblank ) {
2662 $text = $link;
2663 if ( !$noforce ) {
2664 # Strip off leading ':'
2665 $text = substr( $text, 1 );
2666 }
2667 } else {
2668 # T6598 madness. Handle the quotes only if they come from the alternate part
2669 # [[Lista d''e paise d''o munno]] -> <a href="...">Lista d''e paise d''o munno</a>
2670 # [[Criticism of Harry Potter|Criticism of ''Harry Potter'']]
2671 # -> <a href="Criticism of Harry Potter">Criticism of <i>Harry Potter</i></a>
2672 $text = $this->doQuotes( $text );
2673 }
2674
2675 # Link not escaped by : , create the various objects
2676 if ( $noforce && !$nt->wasLocalInterwiki() ) {
2677 # Interwikis
2678 if (
2679 $iw && $this->mOptions->getInterwikiMagic() && $nottalk && (
2680 $this->languageNameUtils->getLanguageName(
2681 $iw,
2682 LanguageNameUtils::AUTONYMS,
2683 LanguageNameUtils::DEFINED
2684 )
2685 || in_array( $iw, $this->svcOptions->get( MainConfigNames::ExtraInterlanguageLinkPrefixes ) )
2686 )
2687 ) {
2688 # T26502: duplicates are resolved in ParserOutput
2689 $this->mOutput->addLanguageLink( $nt );
2690
2695 $s = preg_replace( '/\n\s*$/', '', $s . $prefix ) . $trail;
2696 continue;
2697 }
2698
2699 if ( $ns === NS_FILE ) {
2700 if ( $wasblank ) {
2701 # if no parameters were passed, $text
2702 # becomes something like "File:Foo.png",
2703 # which we don't want to pass on to the
2704 # image generator
2705 $text = '';
2706 } else {
2707 # recursively parse links inside the image caption
2708 # actually, this will parse them in any other parameters, too,
2709 # but it might be hard to fix that, and it doesn't matter ATM
2710 $text = $this->handleExternalLinks( $text );
2711 $holders->merge( $this->handleInternalLinks2( $text ) );
2712 }
2713 # cloak any absolute URLs inside the image markup, so handleExternalLinks() won't touch them
2714 $s .= $prefix . $this->armorLinks(
2715 $this->makeImage( $nt, $text, $holders ) ) . $trail;
2716 continue;
2717 } elseif ( $ns === NS_CATEGORY ) {
2718 # Strip newlines from the left hand context of Category
2719 # links.
2720 # See T2087, T87753, T174639, T359886
2721 $s = preg_replace( '/\n\s*$/', '', $s . $prefix ) . $trail;
2722
2723 $sortkey = ''; // filled in by CategoryLinksTable
2724 if ( !$wasblank ) {
2725 $sortkey = $text;
2726 }
2727 $this->mOutput->addCategory( $nt, $sortkey );
2728
2729 continue;
2730 }
2731 }
2732
2733 # Self-link checking. For some languages, variants of the title are checked in
2734 # LinkHolderArray::doVariants() to allow batching the existence checks necessary
2735 # for linking to a different variant.
2736 if ( $ns !== NS_SPECIAL && $nt->equals( $this->getTitle() ) ) {
2737 $s .= $prefix . Linker::makeSelfLinkObj( $nt, $text, '', $trail, '',
2738 Sanitizer::escapeIdForLink( $nt->getFragment() ) );
2739 continue;
2740 }
2741
2742 # NS_MEDIA is a pseudo-namespace for linking directly to a file
2743 # @todo FIXME: Should do batch file existence checks, see comment below
2744 if ( $ns === NS_MEDIA ) {
2745 # Give extensions a chance to select the file revision for us
2746 $options = [];
2747 $descQuery = false;
2748 $this->hookRunner->onBeforeParserFetchFileAndTitle(
2749 // @phan-suppress-next-line PhanTypeMismatchArgument Type mismatch on pass-by-ref args
2750 $this, $nt, $options, $descQuery
2751 );
2752 # Fetch and register the file (file title may be different via hooks)
2753 [ $file, $nt ] = $this->fetchFileAndTitle( $nt, $options );
2754 # Cloak with NOPARSE to avoid replacement in handleExternalLinks
2755 $s .= $prefix . $this->armorLinks(
2756 Linker::makeMediaLinkFile( $nt, $file, $text ) ) . $trail;
2757 continue;
2758 }
2759
2760 # Some titles, such as valid special pages or files in foreign repos, should
2761 # be shown as bluelinks even though they're not included in the page table
2762 # @todo FIXME: isAlwaysKnown() can be expensive for file links; we should really do
2763 # batch file existence checks for NS_FILE and NS_MEDIA
2764 if ( $iw == '' && $nt->isAlwaysKnown() ) {
2765 $this->mOutput->addLink( $nt );
2766 $s .= $this->makeKnownLinkHolder( $nt, $text, $trail, $prefix );
2767 } else {
2768 # Links will be added to the output link list after checking
2769 $s .= $holders->makeHolder( $nt, $text, $trail, $prefix );
2770 }
2771 }
2772 return $holders;
2773 }
2774
2788 private function makeKnownLinkHolder( LinkTarget $nt, $text = '', $trail = '', $prefix = '' ) {
2789 [ $inside, $trail ] = Linker::splitTrail( $trail );
2790
2791 if ( $text == '' ) {
2792 $text = htmlspecialchars( $this->titleFormatter->getPrefixedText( $nt ) );
2793 }
2794
2795 $link = $this->getLinkRenderer()->makeKnownLink(
2796 $nt, new HtmlArmor( "$prefix$text$inside" )
2797 );
2798
2799 return $this->armorLinks( $link ) . $trail;
2800 }
2801
2812 private function armorLinks( $text ) {
2813 return preg_replace( '/\b((?i)' . $this->urlUtils->validProtocols() . ')/',
2814 self::MARKER_PREFIX . "NOPARSE$1", $text );
2815 }
2816
2826 public function doBlockLevels( $text, $linestart ) {
2827 wfDeprecated( __METHOD__, '1.35' );
2828 return BlockLevelPass::doBlockLevels( $text, $linestart );
2829 }
2830
2839 private function expandMagicVariable( $index, $frame = false ) {
2844 if ( isset( $this->mVarCache[$index] ) ) {
2845 return $this->mVarCache[$index];
2846 }
2847
2848 $ts = new MWTimestamp( $this->mOptions->getTimestamp() /* TS_MW */ );
2849 if ( $this->hookContainer->isRegistered( 'ParserGetVariableValueTs' ) ) {
2850 $s = $ts->getTimestamp( TS_UNIX );
2851 $this->hookRunner->onParserGetVariableValueTs( $this, $s );
2852 $ts = new MWTimestamp( $s );
2853 }
2854
2855 $value = CoreMagicVariables::expand(
2856 $this, $index, $ts, $this->svcOptions, $this->logger
2857 );
2858
2859 if ( $value === null ) {
2860 // Not a defined core magic word
2861 // Don't give this hook unrestricted access to mVarCache
2862 $fakeCache = [];
2863 $this->hookRunner->onParserGetVariableValueSwitch(
2864 // @phan-suppress-next-line PhanTypeMismatchArgument $value is passed as null but returned as string
2865 $this, $fakeCache, $index, $value, $frame
2866 );
2867 // Cache the value returned by the hook by falling through here.
2868 // Assert the the hook returned a non-null value for this MV
2869 '@phan-var string $value';
2870 }
2871
2872 $this->mVarCache[$index] = $value;
2873
2874 return $value;
2875 }
2876
2881 private function initializeVariables() {
2882 $variableIDs = $this->magicWordFactory->getVariableIDs();
2883
2884 $this->mVariables = $this->magicWordFactory->newArray( $variableIDs );
2885 $this->mSubstWords = $this->magicWordFactory->getSubstArray();
2886 }
2887
2906 public function preprocessToDom( $text, $flags = 0 ) {
2907 return $this->getPreprocessor()->preprocessToObj( $text, $flags );
2908 }
2909
2939 public function replaceVariables(
2940 $text, $frame = false, $argsOnly = false, array $options = []
2941 ) {
2942 # Is there any text? Also, Prevent too big inclusions!
2943 $textSize = strlen( $text );
2944 if ( $textSize < 1 || $textSize > $this->mOptions->getMaxIncludeSize() ) {
2945 return $text;
2946 }
2947
2948 if ( $frame === false ) {
2949 $frame = $this->getPreprocessor()->newFrame();
2950 } elseif ( !( $frame instanceof PPFrame ) ) {
2952 __METHOD__ . " called using plain parameters instead of " .
2953 "a PPFrame instance. Creating custom frame.",
2954 '1.43'
2955 );
2956 $frame = $this->getPreprocessor()->newCustomFrame( $frame );
2957 }
2958
2959 $ppFlags = 0;
2960 if ( $options['parsoidTopLevelCall'] ?? false ) {
2961 $ppFlags |= Preprocessor::START_IN_SOL_STATE;
2962 }
2963 $dom = $this->preprocessToDom( $text, $ppFlags );
2964 $flags = $argsOnly ? PPFrame::NO_TEMPLATES : 0;
2965 if ( $options['processNowiki'] ?? false ) {
2966 $flags |= PPFrame::PROCESS_NOWIKI;
2967 }
2968 $stripExtTags = $options['stripExtTags'] ?? true;
2969 [ $stripExtTags, $this->mStripExtTags ] = [ $this->mStripExtTags, $stripExtTags ];
2970 $text = $frame->expand( $dom, $flags );
2971 $this->mStripExtTags = $stripExtTags;
2972
2973 return $text;
2974 }
2975
3003 public function limitationWarn( $limitationType, $current = '', $max = '' ) {
3004 # does no harm if $current and $max are present but are unnecessary for the message
3005 # Not doing ->inLanguage( $this->mOptions->getUserLangObj() ), since this is shown
3006 # only during preview, and that would split the parser cache unnecessarily.
3007 $this->mOutput->addWarningMsg(
3008 "$limitationType-warning",
3009 Message::numParam( $current ),
3010 Message::numParam( $max )
3011 );
3012 $this->addTrackingCategory( "$limitationType-category" );
3013 }
3014
3028 public function braceSubstitution( array $piece, PPFrame $frame ) {
3029 // Flags
3030
3031 // $text has been filled
3032 $found = false;
3033 $text = '';
3034 // wiki markup in $text should be escaped
3035 $nowiki = false;
3036 // $text is HTML, armour it against most wikitext transformation
3037 // (it still participates in doBlockLevels, language conversion,
3038 // and the other steps at the start of ::internalParseHalfParsed)
3039 $isHTML = false;
3040 // $text is raw HTML, armour it against all wikitext transformation
3041 $isRawHTML = false;
3042 // Force interwiki transclusion to be done in raw mode not rendered
3043 $forceRawInterwiki = false;
3044 // $text is a DOM node needing expansion in a child frame
3045 $isChildObj = false;
3046 // $text is a DOM node needing expansion in the current frame
3047 $isLocalObj = false;
3048
3049 # Title object, where $text came from
3050 $title = false;
3051
3052 # $part1 is the bit before the first |, and must contain only title characters.
3053 # Various prefixes will be stripped from it later.
3054 $titleWithSpaces = $frame->expand( $piece['title'] );
3055 $part1 = trim( $titleWithSpaces );
3056 $titleText = false;
3057
3058 # Original title text preserved for various purposes
3059 $originalTitle = $part1;
3060
3061 # $args is a list of argument nodes, starting from index 0, not including $part1
3062 $args = $piece['parts'];
3063
3064 $profileSection = null; // profile templates
3065
3066 $sawDeprecatedTemplateEquals = false; // T91154
3067
3068 $isParsoid = $this->mOptions->getUseParsoid();
3069
3070 # SUBST
3071 // @phan-suppress-next-line PhanImpossibleCondition
3072 if ( !$found ) {
3073 $substMatch = $this->mSubstWords->matchStartAndRemove( $part1 );
3074 $part1 = trim( $part1 );
3075
3076 # Possibilities for substMatch: "subst", "safesubst" or FALSE
3077 # Decide whether to expand template or keep wikitext as-is.
3078 if ( $this->ot['wiki'] ) {
3079 if ( $substMatch === false ) {
3080 $literal = true; # literal when in PST with no prefix
3081 } else {
3082 $literal = false; # expand when in PST with subst: or safesubst:
3083 }
3084 } else {
3085 if ( $substMatch == 'subst' ) {
3086 $literal = true; # literal when not in PST with plain subst:
3087 } else {
3088 $literal = false; # expand when not in PST with safesubst: or no prefix
3089 }
3090 }
3091 if ( $literal ) {
3092 $text = $frame->virtualBracketedImplode( '{{', '|', '}}', $titleWithSpaces, $args );
3093 $isLocalObj = true;
3094 $found = true;
3095 }
3096 }
3097
3098 # Variables
3099 if ( !$found && $args->getLength() == 0 ) {
3100 $id = $this->mVariables->matchStartToEnd( $part1 );
3101 if ( $id !== false ) {
3102 if ( strpos( $part1, ':' ) !== false ) {
3104 'Registering a magic variable with a name including a colon',
3105 '1.39', false, false
3106 );
3107 }
3108 $text = $this->expandMagicVariable( $id, $frame );
3109 $found = true;
3110 }
3111 }
3112
3113 # MSG, MSGNW and RAW
3114 if ( !$found ) {
3115 # Check for MSGNW:
3116 $mwMsgnw = $this->magicWordFactory->get( 'msgnw' );
3117 if ( $mwMsgnw->matchStartAndRemove( $part1 ) ) {
3118 $nowiki = true;
3119 } else {
3120 # Remove obsolete MSG:
3121 $mwMsg = $this->magicWordFactory->get( 'msg' );
3122 $mwMsg->matchStartAndRemove( $part1 );
3123 }
3124
3125 # Check for RAW:
3126 $mwRaw = $this->magicWordFactory->get( 'raw' );
3127 if ( $mwRaw->matchStartAndRemove( $part1 ) ) {
3128 $forceRawInterwiki = true;
3129 }
3130 }
3131
3132 # Parser functions
3133 if ( !$found ) {
3134 $colonPos = strpos( $part1, ':' );
3135 if ( $colonPos !== false ) {
3136 $func = substr( $part1, 0, $colonPos );
3137 $funcArgs = [ trim( substr( $part1, $colonPos + 1 ) ) ];
3138 $argsLength = $args->getLength();
3139 for ( $i = 0; $i < $argsLength; $i++ ) {
3140 $funcArgs[] = $args->item( $i );
3141 }
3142
3143 $result = $this->callParserFunction(
3144 $frame, $func, $funcArgs, $isParsoid && $piece['lineStart']
3145 );
3146
3147 // Extract any forwarded flags
3148 if ( isset( $result['title'] ) ) {
3149 $title = $result['title'];
3150 }
3151 if ( isset( $result['found'] ) ) {
3152 $found = $result['found'];
3153 }
3154 if ( array_key_exists( 'text', $result ) ) {
3155 // a string or null
3156 $text = $result['text'];
3157 }
3158 if ( isset( $result['nowiki'] ) ) {
3159 $nowiki = $result['nowiki'];
3160 }
3161 if ( isset( $result['isHTML'] ) ) {
3162 $isHTML = $result['isHTML'];
3163 }
3164 if ( isset( $result['isRawHTML'] ) ) {
3165 $isRawHTML = $result['isRawHTML'];
3166 }
3167 if ( isset( $result['forceRawInterwiki'] ) ) {
3168 $forceRawInterwiki = $result['forceRawInterwiki'];
3169 }
3170 if ( isset( $result['isChildObj'] ) ) {
3171 $isChildObj = $result['isChildObj'];
3172 }
3173 if ( isset( $result['isLocalObj'] ) ) {
3174 $isLocalObj = $result['isLocalObj'];
3175 }
3176 }
3177 }
3178
3179 # Finish mangling title and then check for loops.
3180 # Set $title to a Title object and $titleText to the PDBK
3181 if ( !$found ) {
3182 $ns = NS_TEMPLATE;
3183 # Split the title into page and subpage
3184 $subpage = '';
3185 $relative = Linker::normalizeSubpageLink(
3186 $this->getTitle(), $part1, $subpage
3187 );
3188 if ( $part1 !== $relative ) {
3189 $part1 = $relative;
3190 $ns = $this->getTitle()->getNamespace();
3191 }
3192 $title = Title::newFromText( $part1, $ns );
3193 if ( $title ) {
3194 $titleText = $title->getPrefixedText();
3195 # Check for language variants if the template is not found
3196 if ( $this->getTargetLanguageConverter()->hasVariants() && $title->getArticleID() == 0 ) {
3197 $this->getTargetLanguageConverter()->findVariantLink( $part1, $title, true );
3198 }
3199 # Do recursion depth check
3200 $limit = $this->mOptions->getMaxTemplateDepth();
3201 if ( $frame->depth >= $limit ) {
3202 $found = true;
3203 $text = '<span class="error">'
3204 . wfMessage( 'parser-template-recursion-depth-warning' )
3205 ->numParams( $limit )->inContentLanguage()->text()
3206 . '</span>';
3207 }
3208 }
3209 }
3210
3211 # Load from database
3212 if ( !$found && $title ) {
3213 $profileSection = $this->mProfiler->scopedProfileIn( $title->getPrefixedDBkey() );
3214 if ( !$title->isExternal() ) {
3215 if ( $title->isSpecialPage()
3216 && $this->mOptions->getAllowSpecialInclusion()
3217 && ( $this->ot['html'] ||
3218 // PFragment for Parsoid
3219 ( !$this->mStripExtTags && $this->ot['pre'] ) )
3220 ) {
3221 $specialPage = $this->specialPageFactory->getPage( $title->getDBkey() );
3222 // Pass the template arguments as URL parameters.
3223 // "uselang" will have no effect since the Language object
3224 // is forced to the one defined in ParserOptions.
3225 $pageArgs = [];
3226 $argsLength = $args->getLength();
3227 for ( $i = 0; $i < $argsLength; $i++ ) {
3228 $bits = $args->item( $i )->splitArg();
3229 if ( strval( $bits['index'] ) === '' ) {
3230 $name = trim( $frame->expand( $bits['name'], PPFrame::STRIP_COMMENTS ) );
3231 $value = trim( $frame->expand( $bits['value'] ) );
3232 $pageArgs[$name] = $value;
3233 }
3234 }
3235
3236 // Create a new context to execute the special page, that is expensive
3237 if ( $this->incrementExpensiveFunctionCount() ) {
3238 $context = new RequestContext;
3239 $context->setTitle( $title );
3240 $context->setRequest( new FauxRequest( $pageArgs ) );
3241 if ( $specialPage && $specialPage->maxIncludeCacheTime() === 0 ) {
3242 $context->setUser( $this->userFactory->newFromUserIdentity( $this->getUserIdentity() ) );
3243 } else {
3244 // If this page is cached, then we better not be per user.
3245 $context->setUser( User::newFromName( '127.0.0.1', false ) );
3246 }
3247 $context->setLanguage( $this->mOptions->getUserLangObj() );
3248 $ret = $this->specialPageFactory->capturePath( $title, $context, $this->getLinkRenderer() );
3249 if ( $ret ) {
3250 $text = $context->getOutput()->getHTML();
3251 $this->mOutput->addOutputPageMetadata( $context->getOutput() );
3252 $found = true;
3253 $isHTML = true;
3254 if ( $specialPage && $specialPage->maxIncludeCacheTime() !== false ) {
3255 $this->mOutput->updateRuntimeAdaptiveExpiry(
3256 $specialPage->maxIncludeCacheTime()
3257 );
3258 }
3259 }
3260 }
3261 } elseif ( $this->nsInfo->isNonincludable( $title->getNamespace() ) ) {
3262 $found = false; # access denied
3263 $this->logger->debug(
3264 __METHOD__ .
3265 ": template inclusion denied for " . $title->getPrefixedDBkey()
3266 );
3267 } else {
3268 [ $text, $title ] = $this->getTemplateDom( $title, $isParsoid && $piece['lineStart'] );
3269 if ( $text !== false ) {
3270 $found = true;
3271 $isChildObj = true;
3272 if (
3273 $title->getNamespace() === NS_TEMPLATE &&
3274 $title->getDBkey() === '=' &&
3275 $originalTitle === '='
3276 ) {
3277 // Note that we won't get here if `=` is evaluated
3278 // (in the future) as a parser function, nor if
3279 // the Template namespace is given explicitly,
3280 // ie `{{Template:=}}`. Only `{{=}}` triggers.
3281 $sawDeprecatedTemplateEquals = true; // T91154
3282 }
3283 }
3284 }
3285
3286 # If the title is valid but undisplayable, make a link to it
3287 if ( !$found && ( $this->ot['html'] || $this->ot['pre'] ) ) {
3288 $text = "[[:$titleText]]";
3289 $found = true;
3290 }
3291 } elseif ( $title->isTrans() ) {
3292 # Interwiki transclusion
3293 if ( $this->ot['html'] && !$forceRawInterwiki ) {
3294 $text = $this->interwikiTransclude( $title, 'render' );
3295 $isHTML = true;
3296 } else {
3297 $text = $this->interwikiTransclude( $title, 'raw' );
3298 # Preprocess it like a template
3299 $sol = ( $isParsoid && $piece['lineStart'] ) ? Preprocessor::START_IN_SOL_STATE : 0;
3300 $text = $this->preprocessToDom( $text, Preprocessor::DOM_FOR_INCLUSION | $sol );
3301 $isChildObj = true;
3302 }
3303 $found = true;
3304 }
3305
3306 # Do infinite loop check
3307 # This has to be done after redirect resolution to avoid infinite loops via redirects
3308 if ( !$frame->loopCheck( $title ) ) {
3309 $found = true;
3310 $text = '<span class="error">'
3311 . wfMessage( 'parser-template-loop-warning', $titleText )->inContentLanguage()->text()
3312 . '</span>';
3313 $this->addTrackingCategory( 'template-loop-category' );
3314 $this->mOutput->addWarningMsg(
3315 'template-loop-warning',
3316 Message::plaintextParam( $titleText )
3317 );
3318 $this->logger->debug( __METHOD__ . ": template loop broken at '$titleText'" );
3319 }
3320 }
3321
3322 # If we haven't found text to substitute by now, we're done
3323 # Recover the source wikitext and return it
3324 if ( !$found ) {
3325 $text = $frame->virtualBracketedImplode( '{{', '|', '}}', $titleWithSpaces, $args );
3326 if ( $profileSection ) {
3327 $this->mProfiler->scopedProfileOut( $profileSection );
3328 }
3329 return [ 'object' => $text ];
3330 }
3331
3332 # Expand DOM-style return values in a child frame
3333 if ( $isChildObj ) {
3334 # Clean up argument array
3335 $newFrame = $frame->newChild( $args, $title );
3336
3337 if ( $nowiki ) {
3338 $text = $newFrame->expand( $text, PPFrame::RECOVER_ORIG );
3339 } elseif ( $titleText !== false && $newFrame->isEmpty() ) {
3340 # Expansion is eligible for the empty-frame cache
3341 $text = $newFrame->cachedExpand( $titleText, $text );
3342 } else {
3343 # Uncached expansion
3344 $text = $newFrame->expand( $text );
3345 }
3346 }
3347 if ( $isLocalObj && $nowiki ) {
3348 $text = $frame->expand( $text, PPFrame::RECOVER_ORIG );
3349 $isLocalObj = false;
3350 }
3351
3352 if ( $profileSection ) {
3353 $this->mProfiler->scopedProfileOut( $profileSection );
3354 }
3355 if (
3356 $sawDeprecatedTemplateEquals &&
3357 $this->mStripState->unstripBoth( $text ) !== '='
3358 ) {
3359 // T91154: {{=}} is deprecated when it doesn't expand to `=`;
3360 // use {{Template:=}} if you must.
3361 $this->addTrackingCategory( 'template-equals-category' );
3362 $this->mOutput->addWarningMsg( 'template-equals-warning' );
3363 }
3364
3365 # Replace raw HTML by a placeholder
3366 if ( $isHTML ) {
3367 // @phan-suppress-next-line SecurityCheck-XSS
3368 $text = $this->insertStripItem( $text );
3369 } elseif ( $isRawHTML ) {
3370 $marker = self::MARKER_PREFIX . "-pf-"
3371 . sprintf( '%08X', $this->mMarkerIndex++ ) . self::MARKER_SUFFIX;
3372 // use 'nowiki' type to protect this from doBlockLevels,
3373 // language conversion, etc.
3374 // @phan-suppress-next-line SecurityCheck-XSS
3375 $this->mStripState->addNoWiki( $marker, $text );
3376 $text = $marker;
3377 } elseif ( $nowiki && ( $this->ot['html'] || $this->ot['pre'] ) ) {
3378 # Escape nowiki-style return values
3379 // @phan-suppress-next-line SecurityCheck-DoubleEscaped
3380 $text = wfEscapeWikiText( $text );
3381 } elseif ( is_string( $text )
3382 && !$piece['lineStart']
3383 && preg_match( '/^(?:{\\||:|;|#|\*)/', $text )
3384 ) {
3385 // T2529: if the template begins with a table or block-level
3386 // element, it should be treated as beginning a new line.
3387 // This behavior is somewhat controversial.
3388 //
3389 // T382464: Parsoid sets $piece['lineStart'] at top-level when
3390 // expanding templates, so this hack is restricted to nested expansions.
3391 $text = "\n" . $text;
3392 }
3393
3394 if ( is_string( $text ) && !$this->incrementIncludeSize( 'post-expand', strlen( $text ) ) ) {
3395 # Error, oversize inclusion
3396 if ( $titleText !== false ) {
3397 # Make a working, properly escaped link if possible (T25588)
3398 $text = "[[:$titleText]]";
3399 } else {
3400 # This will probably not be a working link, but at least it may
3401 # provide some hint of where the problem is
3402 $originalTitle = preg_replace( '/^:/', '', $originalTitle );
3403 $text = "[[:$originalTitle]]";
3404 }
3405 $text .= $this->insertStripItem( '<!-- WARNING: template omitted, '
3406 . 'post-expand include size too large -->' );
3407 $this->limitationWarn( 'post-expand-template-inclusion' );
3408 }
3409
3410 if ( $isLocalObj ) {
3411 $ret = [ 'object' => $text ];
3412 } else {
3413 $ret = [ 'text' => $text ];
3414 }
3415
3416 return $ret;
3417 }
3418
3444 public function callParserFunction( PPFrame $frame, $function, array $args = [], bool $inSolState = false ) {
3445 # Case sensitive functions
3446 if ( isset( $this->mFunctionSynonyms[1][$function] ) ) {
3447 $function = $this->mFunctionSynonyms[1][$function];
3448 } else {
3449 # Case insensitive functions
3450 $function = $this->contLang->lc( $function );
3451 if ( isset( $this->mFunctionSynonyms[0][$function] ) ) {
3452 $function = $this->mFunctionSynonyms[0][$function];
3453 } else {
3454 return [ 'found' => false ];
3455 }
3456 }
3457
3458 [ $callback, $flags ] = $this->mFunctionHooks[$function];
3459
3460 $allArgs = [ $this ];
3461 if ( $flags & self::SFH_OBJECT_ARGS ) {
3462 # Convert arguments to PPNodes and collect for appending to $allArgs
3463 $funcArgs = [];
3464 foreach ( $args as $k => $v ) {
3465 if ( $v instanceof PPNode || $k === 0 ) {
3466 $funcArgs[] = $v;
3467 } else {
3468 $funcArgs[] = $this->mPreprocessor->newPartNodeArray( [ $k => $v ] )->item( 0 );
3469 }
3470 }
3471
3472 # Add a frame parameter, and pass the arguments as an array
3473 $allArgs[] = $frame;
3474 $allArgs[] = $funcArgs;
3475 } else {
3476 # Convert arguments to plain text and append to $allArgs
3477 foreach ( $args as $k => $v ) {
3478 if ( $v instanceof PPNode ) {
3479 $allArgs[] = trim( $frame->expand( $v ) );
3480 } elseif ( is_int( $k ) && $k >= 0 ) {
3481 $allArgs[] = trim( $v );
3482 } else {
3483 $allArgs[] = trim( "$k=$v" );
3484 }
3485 }
3486 }
3487
3488 $result = $callback( ...$allArgs );
3489
3490 # The interface for function hooks allows them to return a wikitext
3491 # string or an array containing the string and any flags. This mungs
3492 # things around to match what this method should return.
3493 if ( !is_array( $result ) ) {
3494 $result = [
3495 'found' => true,
3496 'text' => $result,
3497 ];
3498 } else {
3499 if ( isset( $result[0] ) && !isset( $result['text'] ) ) {
3500 $result['text'] = $result[0];
3501 }
3502 unset( $result[0] );
3503 $result += [
3504 'found' => true,
3505 ];
3506 }
3507
3508 $noparse = true;
3509 $preprocessFlags = 0;
3510 if ( isset( $result['noparse'] ) ) {
3511 $noparse = $result['noparse'];
3512 }
3513 if ( isset( $result['preprocessFlags'] ) ) {
3514 $preprocessFlags = $result['preprocessFlags'];
3515 }
3516
3517 if ( !$noparse ) {
3518 $preprocessFlags |= ( $inSolState ? Preprocessor::START_IN_SOL_STATE : 0 );
3519 $result['text'] = $this->preprocessToDom( $result['text'], $preprocessFlags );
3520 $result['isChildObj'] = true;
3521 }
3522
3523 return $result;
3524 }
3525
3544 public function getTemplateDom( LinkTarget $title, bool $inSolState = false ) {
3545 $cacheTitle = $title;
3546 $titleKey = CacheKeyHelper::getKeyForPage( $title );
3547
3548 if ( isset( $this->mTplRedirCache[$titleKey] ) ) {
3549 [ $ns, $dbk ] = $this->mTplRedirCache[$titleKey];
3550 $title = Title::makeTitle( $ns, $dbk );
3551 $titleKey = CacheKeyHelper::getKeyForPage( $title );
3552 }
3553
3554 // Factor in sol-state in the cache key
3555 $titleKey = "$titleKey:sol=" . ( $inSolState ? "0" : "1" );
3556 if ( isset( $this->mTplDomCache[$titleKey] ) ) {
3557 return [ $this->mTplDomCache[$titleKey], $title ];
3558 }
3559
3560 # Cache miss, go to the database
3561 // FIXME T383919: if $title is changed by this call, caching below
3562 // will be ineffective.
3563 [ $text, $title ] = $this->fetchTemplateAndTitle( $title );
3564
3565 if ( $text === false ) {
3566 $this->mTplDomCache[$titleKey] = false;
3567 return [ false, $title ];
3568 }
3569
3570 $flags = Preprocessor::DOM_FOR_INCLUSION | ( $inSolState ? Preprocessor::START_IN_SOL_STATE : 0 );
3571 $dom = $this->preprocessToDom( $text, $flags );
3572 $this->mTplDomCache[$titleKey] = $dom;
3573
3574 if ( !$title->isSameLinkAs( $cacheTitle ) ) {
3575 $this->mTplRedirCache[ CacheKeyHelper::getKeyForPage( $cacheTitle ) ] =
3576 [ $title->getNamespace(), $title->getDBkey() ];
3577 }
3578
3579 return [ $dom, $title ];
3580 }
3581
3595 public function fetchCurrentRevisionRecordOfTitle( LinkTarget $link ) {
3596 $cacheKey = CacheKeyHelper::getKeyForPage( $link );
3597 if ( !$this->currentRevisionCache ) {
3598 $this->currentRevisionCache = new MapCacheLRU( 100 );
3599 }
3600 if ( !$this->currentRevisionCache->has( $cacheKey ) ) {
3601 $title = Title::newFromLinkTarget( $link ); // hook signature compat
3602 $revisionRecord =
3603 // Defaults to Parser::statelessFetchRevisionRecord()
3604 $this->mOptions->getCurrentRevisionRecordCallback()(
3605 $title,
3606 $this
3607 );
3608 if ( $revisionRecord === false ) {
3609 // Parser::statelessFetchRevisionRecord() can return false;
3610 // normalize it to null.
3611 $revisionRecord = null;
3612 }
3613 $this->currentRevisionCache->set( $cacheKey, $revisionRecord );
3614 }
3615 return $this->currentRevisionCache->get( $cacheKey );
3616 }
3617
3624 public function isCurrentRevisionOfTitleCached( LinkTarget $link ) {
3625 $key = CacheKeyHelper::getKeyForPage( $link );
3626 return (
3627 $this->currentRevisionCache &&
3628 $this->currentRevisionCache->has( $key )
3629 );
3630 }
3631
3640 public static function statelessFetchRevisionRecord( LinkTarget $link, $parser = null ) {
3641 if ( $link instanceof PageIdentity ) {
3642 // probably a Title, just use it.
3643 $page = $link;
3644 } else {
3645 // XXX: use RevisionStore::getPageForLink()!
3646 // ...but get the info for the current revision at the same time?
3647 // Should RevisionStore::getKnownCurrentRevision accept a LinkTarget?
3648 $page = Title::newFromLinkTarget( $link );
3649 }
3650
3651 $revRecord = MediaWikiServices::getInstance()
3652 ->getRevisionLookup()
3653 ->getKnownCurrentRevision( $page );
3654 return $revRecord;
3655 }
3656
3663 public function fetchTemplateAndTitle( LinkTarget $link ) {
3664 // Use Title for compatibility with callbacks and return type
3665 $title = Title::newFromLinkTarget( $link );
3666
3667 // Defaults to Parser::statelessFetchTemplate()
3668 $templateCb = $this->mOptions->getTemplateCallback();
3669 $stuff = $templateCb( $title, $this );
3670 $revRecord = $stuff['revision-record'] ?? null;
3671
3672 $text = $stuff['text'];
3673 if ( is_string( $stuff['text'] ) ) {
3674 // We use U+007F DELETE to distinguish strip markers from regular text
3675 $text = strtr( $text, "\x7f", "?" );
3676 }
3677 $finalTitle = $stuff['finalTitle'] ?? $title;
3678 foreach ( ( $stuff['deps'] ?? [] ) as $dep ) {
3679 $this->mOutput->addTemplate( $dep['title'], $dep['page_id'], $dep['rev_id'] );
3680 if ( $dep['title']->equals( $this->getTitle() ) && $revRecord instanceof RevisionRecord ) {
3681 // Self-transclusion; final result may change based on the new page version
3682 try {
3683 $sha1 = $revRecord->getSha1();
3684 } catch ( RevisionAccessException $e ) {
3685 $sha1 = null;
3686 }
3687 $this->setOutputFlag( ParserOutputFlags::VARY_REVISION_SHA1, 'Self transclusion' );
3688 $this->getOutput()->setRevisionUsedSha1Base36( $sha1 );
3689 }
3690 }
3691
3692 return [ $text, $finalTitle ];
3693 }
3694
3705 public static function statelessFetchTemplate( $page, $parser = false ) {
3706 $title = Title::castFromLinkTarget( $page ); // for compatibility with return type
3707 $text = $skip = false;
3708 $finalTitle = $title;
3709 $deps = [];
3710 $revRecord = null;
3711 $contextTitle = $parser ? $parser->getTitle() : null;
3712
3713 # Loop to fetch the article, with up to 2 redirects
3714
3715 # Note that $title (including redirect targets) could be
3716 # external; we do allow hooks a chance to redirect the
3717 # external title to a local one (which might be useful), but
3718 # are careful not to add external titles to the dependency
3719 # list. (T362221)
3720
3721 $services = MediaWikiServices::getInstance();
3722 $revLookup = $services->getRevisionLookup();
3723 $hookRunner = new HookRunner( $services->getHookContainer() );
3724 for ( $i = 0; $i < 3 && is_object( $title ); $i++ ) {
3725 # Give extensions a chance to select the revision instead
3726 $revRecord = null; # Assume no hook
3727 $origTitle = $title;
3728 $titleChanged = false;
3729 $hookRunner->onBeforeParserFetchTemplateRevisionRecord(
3730 # The $title is a not a PageIdentity, as it may
3731 # contain fragments or even represent an attempt to transclude
3732 # a broken or otherwise-missing Title, which the hook may
3733 # fix up. Similarly, the $contextTitle may represent a special
3734 # page or other page which "exists" as a parsing context but
3735 # is not in the DB.
3736 $contextTitle, $title,
3737 $skip, $revRecord
3738 );
3739
3740 if ( $skip ) {
3741 $text = false;
3742 if ( !$title->isExternal() ) {
3743 $deps[] = [
3744 'title' => $title,
3745 'page_id' => $title->getArticleID(),
3746 'rev_id' => null
3747 ];
3748 }
3749 break;
3750 }
3751 # Get the revision
3752 if ( !$revRecord ) {
3753 if ( $parser ) {
3754 $revRecord = $parser->fetchCurrentRevisionRecordOfTitle( $title );
3755 } else {
3756 $revRecord = $revLookup->getRevisionByTitle( $title );
3757 }
3758 }
3759 if ( $revRecord ) {
3760 # Update title, as $revRecord may have been changed by hook
3761 $title = Title::newFromLinkTarget(
3762 $revRecord->getPageAsLinkTarget()
3763 );
3764 // Assuming title is not external if we've got a $revRecord
3765 $deps[] = [
3766 'title' => $title,
3767 'page_id' => $revRecord->getPageId(),
3768 'rev_id' => $revRecord->getId(),
3769 ];
3770 } elseif ( !$title->isExternal() ) {
3771 $deps[] = [
3772 'title' => $title,
3773 'page_id' => $title->getArticleID(),
3774 'rev_id' => null,
3775 ];
3776 }
3777 if ( !$title->equals( $origTitle ) ) {
3778 # If we fetched a rev from a different title, register
3779 # the original title too...
3780 if ( !$origTitle->isExternal() ) {
3781 $deps[] = [
3782 'title' => $origTitle,
3783 'page_id' => $origTitle->getArticleID(),
3784 'rev_id' => null,
3785 ];
3786 }
3787 $titleChanged = true;
3788 }
3789 # If there is no current revision, there is no page
3790 if ( $revRecord === null || $revRecord->getId() === null ) {
3791 $linkCache = $services->getLinkCache();
3792 $linkCache->addBadLinkObj( $title );
3793 }
3794 if ( $revRecord ) {
3795 if ( $titleChanged && !$revRecord->hasSlot( SlotRecord::MAIN ) ) {
3796 // We've added this (missing) title to the dependencies;
3797 // give the hook another chance to redirect it to an
3798 // actual page.
3799 $text = false;
3800 $finalTitle = $title;
3801 continue;
3802 }
3803 if ( $revRecord->hasSlot( SlotRecord::MAIN ) ) { // T276476
3804 $content = $revRecord->getContent( SlotRecord::MAIN );
3805 $text = $content ? $content->getWikitextForTransclusion() : null;
3806 } else {
3807 $text = false;
3808 }
3809
3810 if ( $text === false || $text === null ) {
3811 $text = false;
3812 break;
3813 }
3814 } elseif ( $title->getNamespace() === NS_MEDIAWIKI ) {
3815 $message = wfMessage( $services->getContentLanguage()->
3816 lcfirst( $title->getText() ) )->inContentLanguage();
3817 if ( !$message->exists() ) {
3818 $text = false;
3819 break;
3820 }
3821 $text = $message->plain();
3822 break;
3823 } else {
3824 break;
3825 }
3826 // @phan-suppress-next-line PhanPossiblyUndeclaredVariable Only reached when content is set
3827 if ( !$content ) {
3828 break;
3829 }
3830 # Redirect?
3831 $finalTitle = $title;
3832 $title = $content->getRedirectTarget();
3833 }
3834
3835 $retValues = [
3836 // previously, when this also returned a Revision object, we set
3837 // 'revision-record' to false instead of null if it was unavailable,
3838 // so that callers to use isset and then rely on the revision-record
3839 // key instead of the revision key, even if there was no corresponding
3840 // object - we continue to set to false here for backwards compatability
3841 'revision-record' => $revRecord ?: false,
3842 'text' => $text,
3843 'finalTitle' => $finalTitle,
3844 'deps' => $deps
3845 ];
3846 return $retValues;
3847 }
3848
3857 public function fetchFileAndTitle( LinkTarget $link, array $options = [] ) {
3858 $file = $this->fetchFileNoRegister( $link, $options );
3859
3860 $time = $file ? $file->getTimestamp() : false;
3861 $sha1 = $file ? $file->getSha1() : false;
3862 # Register the file as a dependency...
3863 $this->mOutput->addImage( $link, $time, $sha1 );
3864 if ( $file && !$link->isSameLinkAs( $file->getTitle() ) ) {
3865 # Update fetched file title after resolving redirects, etc.
3866 $link = $file->getTitle();
3867 $this->mOutput->addImage( $link, $time, $sha1 );
3868 }
3869
3870 $title = Title::newFromLinkTarget( $link ); // for return type compat
3871 return [ $file, $title ];
3872 }
3873
3884 protected function fetchFileNoRegister( LinkTarget $link, array $options = [] ) {
3885 if ( isset( $options['broken'] ) ) {
3886 $file = false; // broken thumbnail forced by hook
3887 } else {
3888 $repoGroup = MediaWikiServices::getInstance()->getRepoGroup();
3889 if ( isset( $options['sha1'] ) ) { // get by (sha1,timestamp)
3890 $file = $repoGroup->findFileFromKey( $options['sha1'], $options );
3891 } else { // get by (name,timestamp)
3892 $link = TitleValue::newFromLinkTarget( $link );
3893 $file = $repoGroup->findFile( $link, $options );
3894 }
3895 }
3896 return $file;
3897 }
3898
3908 public function interwikiTransclude( LinkTarget $link, $action ) {
3909 if ( !$this->svcOptions->get( MainConfigNames::EnableScaryTranscluding ) ) {
3910 return wfMessage( 'scarytranscludedisabled' )->inContentLanguage()->text();
3911 }
3912
3913 // TODO: extract relevant functionality from Title
3914 $title = Title::newFromLinkTarget( $link );
3915
3916 $url = $title->getFullURL( [ 'action' => $action ] );
3917 if ( strlen( $url ) > 1024 ) {
3918 return wfMessage( 'scarytranscludetoolong' )->inContentLanguage()->text();
3919 }
3920
3921 $wikiId = $title->getTransWikiID(); // remote wiki ID or false
3922
3923 $fname = __METHOD__;
3924
3925 $cache = $this->wanCache;
3926 $data = $cache->getWithSetCallback(
3927 $cache->makeGlobalKey(
3928 'interwiki-transclude',
3929 ( $wikiId !== false ) ? $wikiId : 'external',
3930 sha1( $url )
3931 ),
3932 $this->svcOptions->get( MainConfigNames::TranscludeCacheExpiry ),
3933 function ( $oldValue, &$ttl ) use ( $url, $fname, $cache ) {
3934 $req = $this->httpRequestFactory->create( $url, [], $fname );
3935
3936 $status = $req->execute(); // Status object
3937 if ( !$status->isOK() ) {
3938 $ttl = $cache::TTL_UNCACHEABLE;
3939 } elseif ( $req->getResponseHeader( 'X-Database-Lagged' ) !== null ) {
3940 $ttl = min( $cache::TTL_LAGGED, $ttl );
3941 }
3942
3943 return [
3944 'text' => $status->isOK() ? $req->getContent() : null,
3945 'code' => $req->getStatus()
3946 ];
3947 },
3948 [
3949 'checkKeys' => ( $wikiId !== false )
3950 ? [ $cache->makeGlobalKey( 'interwiki-page', $wikiId, $title->getDBkey() ) ]
3951 : [],
3952 'pcGroup' => 'interwiki-transclude:5',
3953 'pcTTL' => $cache::TTL_PROC_LONG
3954 ]
3955 );
3956
3957 if ( is_string( $data['text'] ) ) {
3958 $text = $data['text'];
3959 } elseif ( $data['code'] != 200 ) {
3960 // Though we failed to fetch the content, this status is useless.
3961 $text = wfMessage( 'scarytranscludefailed-httpstatus' )
3962 ->params( $url, $data['code'] )->inContentLanguage()->text();
3963 } else {
3964 $text = wfMessage( 'scarytranscludefailed', $url )->inContentLanguage()->text();
3965 }
3966
3967 return $text;
3968 }
3969
3979 public function argSubstitution( array $piece, PPFrame $frame ) {
3980 $error = false;
3981 $parts = $piece['parts'];
3982 $nameWithSpaces = $frame->expand( $piece['title'] );
3983 $argName = trim( $nameWithSpaces );
3984 $object = false;
3985 $text = $frame->getArgument( $argName );
3986 if ( $text === false && $parts->getLength() > 0
3987 && ( $this->ot['html']
3988 || $this->ot['pre']
3989 || ( $this->ot['wiki'] && $frame->isTemplate() )
3990 )
3991 ) {
3992 # No match in frame, use the supplied default
3993 $object = $parts->item( 0 )->getChildren();
3994 }
3995 if ( !$this->incrementIncludeSize( 'arg', strlen( $text ) ) ) {
3996 $error = '<!-- WARNING: argument omitted, expansion size too large -->';
3997 $this->limitationWarn( 'post-expand-template-argument' );
3998 }
3999
4000 if ( $text === false && $object === false ) {
4001 # No match anywhere
4002 $object = $frame->virtualBracketedImplode( '{{{', '|', '}}}', $nameWithSpaces, $parts );
4003 }
4004 if ( $error !== false ) {
4005 $text .= $error;
4006 }
4007 if ( $object !== false ) {
4008 $ret = [ 'object' => $object ];
4009 } else {
4010 $ret = [ 'text' => $text ];
4011 }
4012
4013 return $ret;
4014 }
4015
4016 public function tagNeedsNowikiStrippedInTagPF( string $lowerTagName ): bool {
4017 $parsoidSiteConfig = MediaWikiServices::getInstance()->getParsoidSiteConfig();
4018 return $parsoidSiteConfig->tagNeedsNowikiStrippedInTagPF( $lowerTagName );
4019 }
4020
4040 public function extensionSubstitution( array $params, PPFrame $frame, bool $processNowiki = false ) {
4041 static $errorStr = '<span class="error">';
4042
4043 $name = $frame->expand( $params['name'] );
4044 if ( str_starts_with( $name, $errorStr ) ) {
4045 // Probably expansion depth or node count exceeded. Just punt the
4046 // error up.
4047 return $name;
4048 }
4049
4050 // Parse attributes from XML-like wikitext syntax
4051 $attrText = !isset( $params['attr'] ) ? '' : $frame->expand( $params['attr'] );
4052 if ( str_starts_with( $attrText, $errorStr ) ) {
4053 // See above
4054 return $attrText;
4055 }
4056
4057 // We can't safely check if the expansion for $content resulted in an
4058 // error, because the content could happen to be the error string
4059 // (T149622).
4060 $content = !isset( $params['inner'] ) ? null : $frame->expand( $params['inner'] );
4061
4062 $marker = self::MARKER_PREFIX . "-$name-"
4063 . sprintf( '%08X', $this->mMarkerIndex++ ) . self::MARKER_SUFFIX;
4064
4065 $normalizedName = strtolower( $name );
4066 $isNowiki = $normalizedName === 'nowiki';
4067 $markerType = $isNowiki ? 'nowiki' : 'general';
4068 if ( $this->ot['html'] || ( $processNowiki && $isNowiki ) ) {
4069 $attributes = Sanitizer::decodeTagAttributes( $attrText );
4070 // Merge in attributes passed via {{#tag:}} parser function
4071 if ( isset( $params['attributes'] ) ) {
4072 $attributes += $params['attributes'];
4073 }
4074
4075 if ( isset( $this->mTagHooks[$normalizedName] ) ) {
4076 // Note that $content may be null here, for example if the
4077 // tag is self-closed.
4078 $output = $this->mTagHooks[$normalizedName]( $content, $attributes, $this, $frame );
4079 } else {
4080 $output = '<span class="error">Invalid tag extension name: ' .
4081 htmlspecialchars( $normalizedName ) . '</span>';
4082 }
4083
4084 if ( is_array( $output ) ) {
4085 // Extract flags
4086 $flags = $output;
4087 $output = $flags[0];
4088 if ( isset( $flags['isRawHTML'] ) ) {
4089 $markerType = 'nowiki';
4090 }
4091 if ( isset( $flags['markerType'] ) ) {
4092 $markerType = $flags['markerType'];
4093 }
4094 }
4095 } else {
4096 // We're substituting a {{subst:#tag:}} parser function.
4097 // Convert the attributes it passed into the XML-like string.
4098 if ( isset( $params['attributes'] ) ) {
4099 foreach ( $params['attributes'] as $attrName => $attrValue ) {
4100 $attrText .= ' ' . htmlspecialchars( $attrName ) . '="' .
4101 htmlspecialchars( $this->getStripState()->unstripBoth( $attrValue ), ENT_COMPAT ) . '"';
4102 }
4103 }
4104 if ( $content === null ) {
4105 $output = "<$name$attrText/>";
4106 } else {
4107 $close = $params['close'] === null ? '' : $frame->expand( $params['close'] );
4108 if ( str_starts_with( $close, $errorStr ) ) {
4109 // See above
4110 return $close;
4111 }
4112 $output = "<$name$attrText>$content$close";
4113 }
4114 if ( !$this->mStripExtTags ) {
4115 if ( $this->svcOptions->get( MainConfigNames::ParsoidFragmentSupport ) === 'v2' ) {
4116 $markerType = 'exttag';
4117 } else {
4118 $markerType = 'none';
4119 }
4120 }
4121 }
4122
4123 if ( $markerType === 'none' ) {
4124 return $output;
4125 } elseif ( $markerType === 'nowiki' ) {
4126 $this->mStripState->addNoWiki( $marker, $output );
4127 } elseif ( $markerType === 'general' ) {
4128 $this->mStripState->addGeneral( $marker, $output );
4129 } elseif ( $markerType === 'exttag' ) {
4130 $this->mStripState->addExtTag( $marker, $output );
4131 } else {
4132 throw new UnexpectedValueException( __METHOD__ . ': invalid marker type' );
4133 }
4134 return $marker;
4135 }
4136
4144 private function incrementIncludeSize( $type, $size ) {
4145 if ( $this->mIncludeSizes[$type] + $size > $this->mOptions->getMaxIncludeSize() ) {
4146 return false;
4147 } else {
4148 $this->mIncludeSizes[$type] += $size;
4149 return true;
4150 }
4151 }
4152
4158 $this->mExpensiveFunctionCount++;
4159 return $this->mExpensiveFunctionCount <= $this->mOptions->getExpensiveParserFunctionLimit();
4160 }
4161
4169 private function handleDoubleUnderscore( $text ) {
4170 # The position of __TOC__ needs to be recorded
4171 $mw = $this->magicWordFactory->get( 'toc' );
4172 if ( $mw->match( $text ) ) {
4173 $this->mShowToc = true;
4174 $this->mForceTocPosition = true;
4175
4176 # Set a placeholder. At the end we'll fill it in with the TOC.
4177 $text = $mw->replace( self::TOC_PLACEHOLDER, $text, 1 );
4178
4179 # Only keep the first one.
4180 $text = $mw->replace( '', $text );
4181 # For consistency with all other double-underscores
4182 # (see below)
4183 $this->mOutput->setUnsortedPageProperty( 'toc' );
4184 }
4185
4186 # Now match and remove the rest of them
4187 $mwa = $this->magicWordFactory->getDoubleUnderscoreArray();
4188 $this->mDoubleUnderscores = $mwa->matchAndRemove( $text );
4189
4190 if ( isset( $this->mDoubleUnderscores['nogallery'] ) ) {
4191 $this->mOutput->setNoGallery( true );
4192 }
4193 if ( isset( $this->mDoubleUnderscores['notoc'] ) && !$this->mForceTocPosition ) {
4194 $this->mShowToc = false;
4195 }
4196 if ( isset( $this->mDoubleUnderscores['hiddencat'] )
4197 && $this->getTitle()->getNamespace() === NS_CATEGORY
4198 ) {
4199 $this->addTrackingCategory( 'hidden-category-category' );
4200 }
4201 # (T10068) Allow control over whether robots index a page.
4202 # __INDEX__ always overrides __NOINDEX__, see T16899
4203 if ( isset( $this->mDoubleUnderscores['noindex'] ) && $this->getTitle()->canUseNoindex() ) {
4204 $this->mOutput->setIndexPolicy( 'noindex' );
4205 $this->addTrackingCategory( 'noindex-category' );
4206 }
4207 if ( isset( $this->mDoubleUnderscores['index'] ) && $this->getTitle()->canUseNoindex() ) {
4208 $this->mOutput->setIndexPolicy( 'index' );
4209 $this->addTrackingCategory( 'index-category' );
4210 }
4211
4212 # Cache all double underscores in the database
4213 foreach ( $this->mDoubleUnderscores as $key => $val ) {
4214 $this->mOutput->setUnsortedPageProperty( $key );
4215 }
4216
4217 return $text;
4218 }
4219
4226 public function addTrackingCategory( $msg ) {
4227 return $this->trackingCategories->addTrackingCategory(
4228 $this->mOutput, $msg, $this->getPage()
4229 );
4230 }
4231
4247 public function msg( string $msg, ...$params ): Message {
4248 return wfMessage( $msg, ...$params )
4249 ->inLanguage( $this->getTargetLanguage() )
4250 ->page( $this->getPage() );
4251 }
4252
4253 private function cleanUpTocLine( Node $container ) {
4254 '@phan-var Element|DocumentFragment $container'; // @var Element|DocumentFragment $container
4255 # Strip out HTML
4256 # Allowed tags are:
4257 # * <sup> and <sub> (T10393)
4258 # * <i> (T28375)
4259 # * <b> (r105284)
4260 # * <bdi> (T74884)
4261 # * <span dir="rtl"> and <span dir="ltr"> (T37167)
4262 # * <s> and <strike> (T35715)
4263 # * <q> (T251672)
4264 # We strip any parameter from accepted tags, except dir="rtl|ltr" from <span>,
4265 # to allow setting directionality in toc items.
4266 $allowedTags = [ 'span', 'sup', 'sub', 'bdi', 'i', 'b', 's', 'strike', 'q' ];
4267 $node = $container->firstChild;
4268 while ( $node !== null ) {
4269 $next = $node->nextSibling;
4270 if ( $node instanceof Element ) {
4271 $nodeName = DOMCompat::nodeName( $node );
4272 if ( in_array( $nodeName, [ 'style', 'script' ], true ) ) {
4273 # Remove any <style> or <script> tags (T198618)
4274 DOMCompat::remove( $node );
4275 } elseif ( in_array( $nodeName, $allowedTags, true ) ) {
4276 // Keep tag, remove attributes
4277 $removeAttrs = [];
4278 foreach ( $node->attributes as $attr ) {
4279 if (
4280 $nodeName === 'span' && $attr->name === 'dir'
4281 && ( $attr->value === 'rtl' || $attr->value === 'ltr' )
4282 ) {
4283 // Keep <span dir="rtl"> and <span dir="ltr">
4284 continue;
4285 }
4286 $removeAttrs[] = $attr;
4287 }
4288 foreach ( $removeAttrs as $attr ) {
4289 $node->removeAttributeNode( $attr );
4290 }
4291 $this->cleanUpTocLine( $node );
4292 # Strip '<span></span>', which is the result from the above if
4293 # <span id="foo"></span> is used to produce an additional anchor
4294 # for a section.
4295 if ( $nodeName === 'span' && !$node->hasChildNodes() ) {
4296 DOMCompat::remove( $node );
4297 }
4298 } else {
4299 // Strip tag
4300 $next = $node->firstChild;
4301 // phpcs:ignore Generic.CodeAnalysis.AssignmentInCondition.FoundInWhileCondition
4302 while ( $childNode = $node->firstChild ) {
4303 $node->parentNode->insertBefore( $childNode, $node );
4304 }
4305 DOMCompat::remove( $node );
4306 }
4307 } elseif ( $node instanceof Comment ) {
4308 // Extensions may add comments to headings;
4309 // these shouldn't appear in the ToC either.
4310 DOMCompat::remove( $node );
4311 }
4312 $node = $next;
4313 }
4314 }
4315
4331 private function finalizeHeadings( $text, $origText, $isMain = true ) {
4332 # Inhibit editsection links if requested in the page
4333 if ( isset( $this->mDoubleUnderscores['noeditsection'] ) ) {
4334 $maybeShowEditLink = false;
4335 } else {
4336 $maybeShowEditLink = true; /* Actual presence will depend on post-cache transforms */
4337 }
4338
4339 # Get all headlines for numbering them and adding funky stuff like [edit]
4340 # links - this is for later, but we need the number of headlines right now
4341 # NOTE: white space in headings have been trimmed in handleHeadings. They shouldn't
4342 # be trimmed here since whitespace in HTML headings is significant.
4343 $matches = [];
4344 $numMatches = preg_match_all(
4345 '/<H(?P<level>[1-6])(?P<attrib>.*?>)(?P<header>[\s\S]*?)<\/H[1-6] *>/i',
4346 $text,
4347 $matches
4348 );
4349
4350 # if there are fewer than 4 headlines in the article, do not show TOC
4351 # unless it's been explicitly enabled.
4352 $enoughToc = $this->mShowToc &&
4353 ( ( $numMatches >= 4 ) || $this->mForceTocPosition );
4354
4355 # Allow user to stipulate that a page should have a "new section"
4356 # link added via __NEWSECTIONLINK__
4357 if ( isset( $this->mDoubleUnderscores['newsectionlink'] ) ) {
4358 $this->mOutput->setNewSection( true );
4359 }
4360
4361 # Allow user to remove the "new section"
4362 # link via __NONEWSECTIONLINK__
4363 if ( isset( $this->mDoubleUnderscores['nonewsectionlink'] ) ) {
4364 $this->mOutput->setHideNewSection( true );
4365 }
4366
4367 # if the string __FORCETOC__ (not case-sensitive) occurs in the HTML,
4368 # override above conditions and always show TOC above first header
4369 if ( isset( $this->mDoubleUnderscores['forcetoc'] ) ) {
4370 $this->mShowToc = true;
4371 $enoughToc = true;
4372 }
4373
4374 if ( !$numMatches ) {
4375 return $text;
4376 }
4377
4378 # headline counter
4379 $headlineCount = 0;
4380 $haveTocEntries = false;
4381
4382 # Ugh .. the TOC should have neat indentation levels which can be
4383 # passed to the skin functions. These are determined here
4384 $head = [];
4385 $level = 0;
4386 $tocData = new TOCData();
4387 $baseTitleText = $this->getTitle()->getPrefixedDBkey();
4388 $oldType = $this->mOutputType;
4389 $this->setOutputType( self::OT_WIKI );
4390 $frame = $this->getPreprocessor()->newFrame();
4391 $root = $this->preprocessToDom( $origText );
4392 $node = $root->getFirstChild();
4393 $cpOffset = 0;
4394 $refers = [];
4395
4396 $maxTocLevel = $this->svcOptions->get( MainConfigNames::MaxTocLevel );
4397 $domDocument = DOMUtils::parseHTML( '' );
4398 foreach ( $matches[3] as $headline ) {
4399 // $headline is half-parsed HTML
4400 $isTemplate = false;
4401 $titleText = false;
4402 $sectionIndex = false;
4403 if ( preg_match( self::HEADLINE_MARKER_REGEX, $headline, $markerMatches ) ) {
4404 $serial = (int)$markerMatches[1];
4405 [ $titleText, $sectionIndex ] = $this->mHeadings[$serial];
4406 $isTemplate = ( $titleText != $baseTitleText );
4407 $headline = ltrim( substr( $headline, strlen( $markerMatches[0] ) ) );
4408 }
4409
4410 $sectionMetadata = SectionMetadata::fromLegacy( [
4411 "fromtitle" => $titleText ?: null,
4412 "index" => $sectionIndex === false
4413 ? '' : ( ( $isTemplate ? 'T-' : '' ) . $sectionIndex )
4414 ] );
4415 $tocData->addSection( $sectionMetadata );
4416
4417 $oldLevel = $level;
4418 $level = (int)$matches[1][$headlineCount];
4419 $tocData->processHeading( $oldLevel, $level, $sectionMetadata );
4420
4421 if ( $tocData->getCurrentTOCLevel() < $maxTocLevel ) {
4422 $haveTocEntries = true;
4423 }
4424
4425 # Remove link placeholders by the link text.
4426 # <!--LINK number-->
4427 # turns into
4428 # link text with suffix
4429 # Do this before unstrip since link text can contain strip markers
4430 $fullyParsedHeadline = $this->replaceLinkHoldersText( $headline );
4431
4432 # Avoid insertion of weird stuff like <math> by expanding the relevant sections
4433 $fullyParsedHeadline = $this->mStripState->unstripBoth( $fullyParsedHeadline );
4434
4435 // Run Tidy to convert wikitext entities to HTML entities (T355386),
4436 // conveniently also giving us a way to handle French spaces (T324763)
4437 $fullyParsedHeadline = $this->tidy->tidy( $fullyParsedHeadline, [ Sanitizer::class, 'armorFrenchSpaces' ] );
4438
4439 // Wrap the safe headline to parse the heading attributes
4440 // Literal HTML tags should be sanitized at this point
4441 // cleanUpTocLine will strip the headline tag
4442 $wrappedHeadline = "<h$level" . $matches['attrib'][$headlineCount] . $fullyParsedHeadline . "</h$level>";
4443
4444 // Parse the heading contents as HTML. This makes it easier to strip out some HTML tags,
4445 // and ensures that we generate balanced HTML at the end (T218330).
4446 $headlineDom = DOMUtils::parseHTMLToFragment( $domDocument, $wrappedHeadline );
4447
4448 // Extract a user defined id on the heading
4449 // A heading is expected as the first child and could be asserted
4450 $h = $headlineDom->firstChild;
4451 $headingId = ( $h instanceof Element && DOMUtils::isHeading( $h ) ) ?
4452 DOMCompat::getAttribute( $h, 'id' ) : null;
4453
4454 $this->cleanUpTocLine( $headlineDom );
4455
4456 // Serialize back to HTML
4457 // $tocline is for the TOC display, fully-parsed HTML with some tags removed
4458 $tocline = trim( DOMUtils::getFragmentInnerHTML( $headlineDom ) );
4459
4460 // $headlineText is for the "Edit section: $1" tooltip, plain text
4461 $headlineText = trim( $headlineDom->textContent );
4462
4463 if ( $headingId === null || $headingId === '' ) {
4464 $headingId = Sanitizer::normalizeSectionNameWhitespace( $headlineText );
4465 $headingId = self::normalizeSectionName( $headingId );
4466 }
4467
4468 # Create the anchor for linking from the TOC to the section
4469 $fallbackAnchor = Sanitizer::escapeIdForAttribute( $headingId, Sanitizer::ID_FALLBACK );
4470 $linkAnchor = Sanitizer::escapeIdForLink( $headingId );
4471 $anchor = Sanitizer::escapeIdForAttribute( $headingId, Sanitizer::ID_PRIMARY );
4472 if ( $fallbackAnchor === $anchor ) {
4473 # No reason to have both (in fact, we can't)
4474 $fallbackAnchor = false;
4475 }
4476
4477 # HTML IDs must be case-insensitively unique for IE compatibility (T12721).
4478 $arrayKey = strtolower( $anchor );
4479 if ( $fallbackAnchor === false ) {
4480 $fallbackArrayKey = false;
4481 } else {
4482 $fallbackArrayKey = strtolower( $fallbackAnchor );
4483 }
4484
4485 if ( isset( $refers[$arrayKey] ) ) {
4486 for ( $i = 2; isset( $refers["{$arrayKey}_$i"] ); ++$i );
4487 $anchor .= "_$i";
4488 $linkAnchor .= "_$i";
4489 $refers["{$arrayKey}_$i"] = true;
4490 } else {
4491 $refers[$arrayKey] = true;
4492 }
4493 if ( $fallbackAnchor !== false && isset( $refers[$fallbackArrayKey] ) ) {
4494 for ( $i = 2; isset( $refers["{$fallbackArrayKey}_$i"] ); ++$i );
4495 $fallbackAnchor .= "_$i";
4496 $refers["{$fallbackArrayKey}_$i"] = true;
4497 } else {
4498 $refers[$fallbackArrayKey] = true;
4499 }
4500
4501 # Add the section to the section tree
4502 # Find the DOM node for this header
4503 $noOffset = ( $isTemplate || $sectionIndex === false );
4504 while ( $node && !$noOffset ) {
4505 if ( $node->getName() === 'h' ) {
4506 $bits = $node->splitHeading();
4507 if ( $bits['i'] == $sectionIndex ) {
4508 break;
4509 }
4510 }
4511 $cpOffset += mb_strlen(
4512 $this->mStripState->unstripBoth(
4513 $frame->expand( $node, PPFrame::RECOVER_ORIG )
4514 )
4515 );
4516 $node = $node->getNextSibling();
4517 }
4518 $sectionMetadata->line = $tocline;
4519 $sectionMetadata->codepointOffset = ( $noOffset ? null : $cpOffset );
4520 $sectionMetadata->anchor = $anchor;
4521 $sectionMetadata->linkAnchor = $linkAnchor;
4522
4523 if ( $maybeShowEditLink && $sectionIndex !== false ) {
4524 // Output edit section links as markers with styles that can be customized by skins
4525 if ( $isTemplate ) {
4526 # Put a T flag in the section identifier, to indicate to extractSections()
4527 # that sections inside <includeonly> should be counted.
4528 $editsectionPage = $titleText;
4529 $editsectionSection = "T-$sectionIndex";
4530 } else {
4531 $editsectionPage = $this->getTitle()->getPrefixedText();
4532 $editsectionSection = $sectionIndex;
4533 }
4534 // Construct a pseudo-HTML tag as a placeholder for the section edit link. It is replaced in
4535 // MediaWiki\OutputTransform\Stages\HandleSectionLinks with the real link.
4536 //
4537 // Any HTML markup in the input has already been escaped,
4538 // so we don't have to worry about a user trying to input one of these markers directly.
4539 //
4540 // We put the page and section in attributes to stop the language converter from
4541 // converting them, but put the headline hint in tag content
4542 // because it is supposed to be able to convert that.
4543 $editlink = '<mw:editsection page="' . htmlspecialchars( $editsectionPage, ENT_COMPAT );
4544 $editlink .= '" section="' . htmlspecialchars( $editsectionSection, ENT_COMPAT ) . '"';
4545 $editlink .= '>' . htmlspecialchars( $headlineText ) . '</mw:editsection>';
4546 } else {
4547 $editlink = '';
4548 }
4549 // Reconstruct the original <h#> tag with added attributes. It is replaced in
4550 // MediaWiki\OutputTransform\Stages\HandleSectionLinks to add anchors and stuff.
4551 //
4552 // data-mw-... attributes are forbidden in Sanitizer::isReservedDataAttribute(),
4553 // so we don't have to worry about a user trying to input one of these markers directly.
4554 //
4555 // We put the anchors in attributes to stop the language converter from converting them.
4556 $head[$headlineCount] = "<h$level" . Html::expandAttributes( [
4557 'data-mw-anchor' => $anchor,
4558 'data-mw-fallback-anchor' => $fallbackAnchor,
4559 ] ) . $matches['attrib'][$headlineCount] . $headline . $editlink . "</h$level>";
4560
4561 $headlineCount++;
4562 }
4563
4564 $this->setOutputType( $oldType );
4565
4566 # Never ever show TOC if no headers (or suppressed)
4567 $suppressToc = $this->mOptions->getSuppressTOC();
4568 if ( !$haveTocEntries ) {
4569 $enoughToc = false;
4570 }
4571 $addTOCPlaceholder = false;
4572
4573 if ( $isMain && !$suppressToc ) {
4574 // We generally output the section information via the API
4575 // even if there isn't "enough" of a ToC to merit showing
4576 // it -- but the "suppress TOC" parser option is set when
4577 // any sections that might be found aren't "really there"
4578 // (ie, JavaScript content that might have spurious === or
4579 // <h2>: T307691) so we will *not* set section information
4580 // in that case.
4581 $this->mOutput->setTOCData( $tocData );
4582
4583 // T294950: Record a suggestion that the TOC should be shown.
4584 // Skins are free to ignore this suggestion and implement their
4585 // own criteria for showing/suppressing TOC (T318186).
4586 if ( $enoughToc ) {
4587 $this->mOutput->setOutputFlag( ParserOutputFlags::SHOW_TOC );
4588 if ( !$this->mForceTocPosition ) {
4589 $addTOCPlaceholder = true;
4590 }
4591 }
4592
4593 // If __NOTOC__ is used on the page (and not overridden by
4594 // __TOC__ or __FORCETOC__) set the NO_TOC flag to tell
4595 // the skin that although the section information is
4596 // valid, it should perhaps not be presented as a Table Of
4597 // Contents.
4598 if ( !$this->mShowToc ) {
4599 $this->mOutput->setOutputFlag( ParserOutputFlags::NO_TOC );
4600 }
4601 }
4602
4603 # split up and insert constructed headlines
4604 $blocks = preg_split( '/<h[1-6]\b[^>]*>.*?<\/h[1-6]>/is', $text );
4605 $i = 0;
4606
4607 // build an array of document sections
4608 $sections = [];
4609 foreach ( $blocks as $block ) {
4610 // $head is zero-based, sections aren't.
4611 if ( empty( $head[$i - 1] ) ) {
4612 $sections[$i] = $block;
4613 } else {
4614 $sections[$i] = $head[$i - 1] . $block;
4615 }
4616
4617 $i++;
4618 }
4619
4620 if ( $addTOCPlaceholder ) {
4621 // append the TOC at the beginning
4622 // Top anchor now in skin
4623 // @phan-suppress-next-line PhanTypePossiblyInvalidDimOffset At least one element when enoughToc is true
4624 $sections[0] .= self::TOC_PLACEHOLDER . "\n";
4625 }
4626
4627 return implode( '', $sections );
4628 }
4629
4639 private static function localizeTOC(
4640 ?TOCData $tocData, Language $lang, ?ILanguageConverter $converter
4641 ) {
4642 if ( $tocData === null ) {
4643 return; // Nothing to do
4644 }
4645 foreach ( $tocData->getSections() as $s ) {
4646 // Localize heading
4647 if ( $converter ) {
4648 // T331316: don't use 'convert' or 'convertTo' as these reset
4649 // the language converter state.
4650 $s->line = $converter->convertTo(
4651 $s->line, $converter->getPreferredVariant(), false
4652 );
4653 }
4654 // Localize numbering
4655 $dot = '.';
4656 $pieces = explode( $dot, $s->number );
4657 $numbering = '';
4658 foreach ( $pieces as $i => $p ) {
4659 if ( $i > 0 ) {
4660 $numbering .= $dot;
4661 }
4662 $numbering .= $lang->formatNum( $p );
4663 }
4664 $s->number = $numbering;
4665 }
4666 }
4667
4680 public function preSaveTransform(
4681 $text,
4682 PageReference $page,
4683 UserIdentity $user,
4684 ParserOptions $options,
4685 $clearState = true
4686 ) {
4687 if ( $clearState ) {
4688 $magicScopeVariable = $this->lock();
4689 }
4690 $this->startParse( $page, $options, self::OT_WIKI, $clearState );
4691 $this->setUser( $user );
4692
4693 // Strip U+0000 NULL (T159174)
4694 $text = str_replace( "\000", '', $text );
4695
4696 // We still normalize line endings (including trimming trailing whitespace) for
4697 // backwards-compatibility with other code that just calls PST, but this should already
4698 // be handled in TextContent subclasses
4699 $text = TextContent::normalizeLineEndings( $text );
4700
4701 if ( $options->getPreSaveTransform() ) {
4702 $text = $this->pstPass2( $text, $user );
4703 }
4704 $text = $this->mStripState->unstripBoth( $text );
4705
4706 // Trim trailing whitespace again, because the previous steps can introduce it.
4707 $text = rtrim( $text );
4708
4709 $this->hookRunner->onParserPreSaveTransformComplete( $this, $text );
4710
4711 $this->setUser( null ); # Reset
4712
4713 return $text;
4714 }
4715
4724 private function pstPass2( $text, UserIdentity $user ) {
4725 # Note: This is the timestamp saved as hardcoded wikitext to the database, we use
4726 # $this->contLang here in order to give everyone the same signature and use the default one
4727 # rather than the one selected in each user's preferences. (see also T14815)
4728 $ts = $this->mOptions->getTimestamp();
4729 $timestamp = MWTimestamp::getLocalInstance( $ts );
4730 $ts = $timestamp->format( 'YmdHis' );
4731 $tzMsg = $timestamp->getTimezoneMessage()->inContentLanguage()->text();
4732
4733 $d = $this->contLang->timeanddate( $ts, false, false ) . " ($tzMsg)";
4734
4735 # Variable replacement
4736 # Because mOutputType is OT_WIKI, this will only process {{subst:xxx}} type tags
4737 $text = $this->replaceVariables( $text );
4738
4739 # This works almost by chance, as the replaceVariables are done before the getUserSig(),
4740 # which may corrupt this parser instance via its wfMessage()->text() call-
4741
4742 # Signatures
4743 if ( strpos( $text, '~~~' ) !== false ) {
4744 $sigText = $this->getUserSig( $user );
4745 $text = strtr( $text, [
4746 '~~~~~' => $d,
4747 '~~~~' => "$sigText $d",
4748 '~~~' => $sigText
4749 ] );
4750 # The main two signature forms used above are time-sensitive
4751 $this->setOutputFlag( ParserOutputFlags::USER_SIGNATURE, 'User signature detected' );
4752 }
4753
4754 # Context links ("pipe tricks"): [[|name]] and [[name (context)|]]
4755 $tc = '[' . Title::legalChars() . ']';
4756 $nc = '[ _0-9A-Za-z\x80-\xff-]'; # Namespaces can use non-ascii!
4757
4758 // [[ns:page (context)|]]
4759 $p1 = "/\[\[(:?$nc+:|:|)($tc+?)( ?\\($tc+\\))\\|]]/";
4760 // [[ns:page(context)|]] (double-width brackets, added in r40257)
4761 $p4 = "/\[\[(:?$nc+:|:|)($tc+?)( ?($tc+))\\|]]/";
4762 // [[ns:page (context), context|]] (using single, double-width or Arabic comma)
4763 $p3 = "/\[\[(:?$nc+:|:|)($tc+?)( ?\\($tc+\\)|)((?:, |,|، )$tc+|)\\|]]/";
4764 // [[|page]] (reverse pipe trick: add context from page title)
4765 $p2 = "/\[\[\\|($tc+)]]/";
4766
4767 # try $p1 first, to turn "[[A, B (C)|]]" into "[[A, B (C)|A, B]]"
4768 $text = preg_replace( $p1, '[[\\1\\2\\3|\\2]]', $text );
4769 $text = preg_replace( $p4, '[[\\1\\2\\3|\\2]]', $text );
4770 $text = preg_replace( $p3, '[[\\1\\2\\3\\4|\\2]]', $text );
4771
4772 $t = $this->getTitle()->getText();
4773 $m = [];
4774 if ( preg_match( "/^($nc+:|)$tc+?( \\($tc+\\))$/", $t, $m ) ) {
4775 $text = preg_replace( $p2, "[[$m[1]\\1$m[2]|\\1]]", $text );
4776 } elseif ( preg_match( "/^($nc+:|)$tc+?(, $tc+|)$/", $t, $m ) && "$m[1]$m[2]" != '' ) {
4777 $text = preg_replace( $p2, "[[$m[1]\\1$m[2]|\\1]]", $text );
4778 } else {
4779 # if there's no context, don't bother duplicating the title
4780 $text = preg_replace( $p2, '[[\\1]]', $text );
4781 }
4782
4783 return $text;
4784 }
4785
4801 public function getUserSig( UserIdentity $user, $nickname = false, $fancySig = null ) {
4802 $username = $user->getName();
4803
4804 # If not given, retrieve from the user object.
4805 if ( $nickname === false ) {
4806 $nickname = $this->userOptionsLookup->getOption( $user, 'nickname' );
4807 }
4808
4809 $fancySig ??= $this->userOptionsLookup->getBoolOption( $user, 'fancysig' );
4810
4811 if ( $nickname === null || $nickname === '' ) {
4812 // Empty value results in the default signature (even when fancysig is enabled)
4813 $nickname = $username;
4814 } elseif ( mb_strlen( $nickname ) > $this->svcOptions->get( MainConfigNames::MaxSigChars ) ) {
4815 $nickname = $username;
4816 $this->logger->debug( __METHOD__ . ": $username has overlong signature." );
4817 } elseif ( $fancySig !== false ) {
4818 # Sig. might contain markup; validate this
4819 $isValid = $this->validateSig( $nickname ) !== false;
4820
4821 # New validator
4822 $sigValidation = $this->svcOptions->get( MainConfigNames::SignatureValidation );
4823 if ( $isValid && $sigValidation === 'disallow' ) {
4824 $parserOpts = new ParserOptions(
4825 $this->mOptions->getUserIdentity(),
4826 $this->contLang
4827 );
4828 $validator = $this->signatureValidatorFactory
4829 ->newSignatureValidator( $user, null, $parserOpts );
4830 $isValid = !$validator->validateSignature( $nickname );
4831 }
4832
4833 if ( $isValid ) {
4834 # Validated; clean up (if needed) and return it
4835 return $this->cleanSig( $nickname, true );
4836 } else {
4837 # Failed to validate; fall back to the default
4838 $nickname = $username;
4839 $this->logger->debug( __METHOD__ . ": $username has invalid signature." );
4840 }
4841 }
4842
4843 # Make sure nickname doesnt get a sig in a sig
4844 $nickname = self::cleanSigInSig( $nickname );
4845
4846 # If we're still here, make it a link to the user page
4847 $userText = wfEscapeWikiText( $username );
4848 $nickText = wfEscapeWikiText( $nickname );
4849 if ( $this->userNameUtils->isTemp( $username ) ) {
4850 $msgName = 'signature-temp';
4851 } elseif ( $user->isRegistered() ) {
4852 $msgName = 'signature';
4853 } else {
4854 $msgName = 'signature-anon';
4855 }
4856
4857 return wfMessage( $msgName, $userText, $nickText )->inContentLanguage()
4858 ->page( $this->getPage() )->text();
4859 }
4860
4868 public function validateSig( $text ) {
4869 return Xml::isWellFormedXmlFragment( $text ) ? $text : false;
4870 }
4871
4883 public function cleanSig( $text, $parsing = false ) {
4884 if ( !$parsing ) {
4885 $magicScopeVariable = $this->lock();
4886 $this->startParse(
4887 $this->mTitle,
4888 ParserOptions::newFromUser( RequestContext::getMain()->getUser() ),
4889 self::OT_PREPROCESS,
4890 true
4891 );
4892 }
4893
4894 # Option to disable this feature
4895 if ( !$this->mOptions->getCleanSignatures() ) {
4896 return $text;
4897 }
4898
4899 # @todo FIXME: Regex doesn't respect extension tags or nowiki
4900 # => Move this logic to braceSubstitution()
4901 $substWord = $this->magicWordFactory->get( 'subst' );
4902 $substRegex = '/\{\{(?!(?:' . $substWord->getBaseRegex() . '))/x' . $substWord->getRegexCase();
4903 $substText = '{{' . $substWord->getSynonym( 0 );
4904
4905 $text = preg_replace( $substRegex, $substText, $text );
4906 $text = self::cleanSigInSig( $text );
4907 $dom = $this->preprocessToDom( $text );
4908 $frame = $this->getPreprocessor()->newFrame();
4909 $text = $frame->expand( $dom );
4910
4911 if ( !$parsing ) {
4912 $text = $this->mStripState->unstripBoth( $text );
4913 }
4914
4915 return $text;
4916 }
4917
4925 public static function cleanSigInSig( $text ) {
4926 $text = preg_replace( '/~{3,5}/', '', $text );
4927 return $text;
4928 }
4929
4946 public static function replaceTableOfContentsMarker( $text, $toc ) {
4947 $replaced = false;
4948 return HtmlHelper::modifyElements(
4949 $text,
4950 static function ( SerializerNode $node ): bool {
4951 $prop = $node->attrs['property'] ?? '';
4952 return $node->name === 'meta' && $prop === 'mw:PageProp/toc';
4953 },
4954 static function ( SerializerNode $node ) use ( &$replaced, $toc ) {
4955 if ( $replaced ) {
4956 // Remove the additional metas. While not strictly
4957 // necessary, this also ensures idempotence if we
4958 // run the pass more than once on a given content.
4959 return '';
4960 }
4961 $replaced = true;
4962 return $toc; // outerHTML replacement.
4963 },
4964 false /* use legacy-compatible serialization */
4965 );
4966 }
4967
4979 public function startExternalParse( ?PageReference $page, ParserOptions $options,
4980 $outputType, $clearState = true, $revId = null
4981 ) {
4982 $this->startParse( $page, $options, $outputType, $clearState );
4983 if ( $revId !== null ) {
4984 $this->mRevisionId = $revId;
4985 }
4986 }
4987
4994 private function startParse( ?PageReference $page, ParserOptions $options,
4995 $outputType, $clearState = true
4996 ) {
4997 $this->setPage( $page );
4998 $this->mOptions = $options;
4999 $this->setOutputType( $outputType );
5000 if ( $clearState ) {
5001 $this->clearState();
5002 }
5003 }
5004
5014 public function transformMsg( $text, ParserOptions $options, ?PageReference $page = null ) {
5015 static $executing = false;
5016
5017 # Guard against infinite recursion
5018 if ( $executing ) {
5019 return $text;
5020 }
5021 $executing = true;
5022
5023 $text = $this->preprocess( $text, $page ?? $this->mTitle, $options );
5024
5025 $executing = false;
5026 return $text;
5027 }
5028
5048 public function setHook( $tag, callable $callback ) {
5049 $tag = strtolower( $tag );
5050 if ( preg_match( '/[<>\r\n]/', $tag, $m ) ) {
5051 throw new InvalidArgumentException( "Invalid character {$m[0]} in setHook('$tag', ...) call" );
5052 }
5053 $oldVal = $this->mTagHooks[$tag] ?? null;
5054 $this->mTagHooks[$tag] = $callback;
5055 if ( !in_array( $tag, $this->mStripList ) ) {
5056 $this->mStripList[] = $tag;
5057 }
5058
5059 return $oldVal;
5060 }
5061
5066 public function clearTagHooks() {
5067 $this->mTagHooks = [];
5068 $this->mStripList = [];
5069 }
5070
5119 public function setFunctionHook( $id, callable $callback, $flags = 0 ) {
5120 $oldVal = $this->mFunctionHooks[$id][0] ?? null;
5121 $this->mFunctionHooks[$id] = [ $callback, $flags ];
5122
5123 # Add to function cache
5124 $mw = $this->magicWordFactory->get( $id );
5125
5126 $synonyms = $mw->getSynonyms();
5127 $sensitive = intval( $mw->isCaseSensitive() );
5128
5129 foreach ( $synonyms as $syn ) {
5130 # Case
5131 if ( !$sensitive ) {
5132 $syn = $this->contLang->lc( $syn );
5133 }
5134 # Add leading hash
5135 if ( !( $flags & self::SFH_NO_HASH ) ) {
5136 $syn = '#' . $syn;
5137 }
5138 # Remove trailing colon
5139 if ( substr( $syn, -1, 1 ) === ':' ) {
5140 $syn = substr( $syn, 0, -1 );
5141 }
5142 $this->mFunctionSynonyms[$sensitive][$syn] = $id;
5143 }
5144 return $oldVal;
5145 }
5146
5153 public function getFunctionHooks() {
5154 return array_keys( $this->mFunctionHooks );
5155 }
5156
5164 public function replaceLinkHolders( &$text ) {
5165 $this->replaceLinkHoldersPrivate( $text );
5166 }
5167
5174 private function replaceLinkHoldersPrivate( &$text ) {
5175 $this->mLinkHolders->replace( $text );
5176 }
5177
5185 private function replaceLinkHoldersText( $text ) {
5186 return $this->mLinkHolders->replaceText( $text );
5187 }
5188
5203 public function renderImageGallery( $text, array $params ) {
5204 $mode = false;
5205 if ( isset( $params['mode'] ) ) {
5206 $mode = $params['mode'];
5207 }
5208
5209 try {
5210 $ig = ImageGalleryBase::factory( $mode );
5211 } catch ( ImageGalleryClassNotFoundException $e ) {
5212 // If invalid type set, fallback to default.
5213 $ig = ImageGalleryBase::factory( false );
5214 }
5215
5216 $ig->setContextTitle( $this->getTitle() );
5217 $ig->setShowBytes( false );
5218 $ig->setShowDimensions( false );
5219 $ig->setShowFilename( false );
5220 $ig->setParser( $this );
5221 $ig->setHideBadImages();
5222 $ig->setAttributes( Sanitizer::validateTagAttributes( $params, 'ul' ) );
5223
5224 if ( isset( $params['showfilename'] ) ) {
5225 $ig->setShowFilename( true );
5226 } else {
5227 $ig->setShowFilename( false );
5228 }
5229 if ( isset( $params['caption'] ) ) {
5230 // NOTE: We aren't passing a frame here or below. Frame info
5231 // is currently opaque to Parsoid, which acts on OT_PREPROCESS.
5232 // See T107332#4030581
5233 $caption = $this->recursiveTagParse( $params['caption'] );
5234 $ig->setCaptionHtml( $caption );
5235 }
5236 if ( isset( $params['perrow'] ) ) {
5237 $ig->setPerRow( $params['perrow'] );
5238 }
5239 if ( isset( $params['widths'] ) ) {
5240 $ig->setWidths( $params['widths'] );
5241 }
5242 if ( isset( $params['heights'] ) ) {
5243 $ig->setHeights( $params['heights'] );
5244 }
5245 $ig->setAdditionalOptions( $params );
5246
5247 $enableLegacyMediaDOM = $this->svcOptions->get( MainConfigNames::ParserEnableLegacyMediaDOM );
5248
5249 $lines = StringUtils::explode( "\n", $text );
5250 foreach ( $lines as $line ) {
5251 # match lines like these:
5252 # Image:someimage.jpg|This is some image
5253 $matches = [];
5254 preg_match( "/^([^|]+)(\\|(.*))?$/", $line, $matches );
5255 # Skip empty lines
5256 if ( count( $matches ) == 0 ) {
5257 continue;
5258 }
5259
5260 if ( strpos( $matches[0], '%' ) !== false ) {
5261 $matches[1] = rawurldecode( $matches[1] );
5262 }
5263 $title = Title::newFromText( $matches[1], NS_FILE );
5264 if ( $title === null ) {
5265 # Bogus title. Ignore these so we don't bomb out later.
5266 continue;
5267 }
5268
5269 # We need to get what handler the file uses, to figure out parameters.
5270 # Note, a hook can override the file name, and chose an entirely different
5271 # file (which potentially could be of a different type and have different handler).
5272 $options = [];
5273 $descQuery = false;
5274 $this->hookRunner->onBeforeParserFetchFileAndTitle(
5275 // @phan-suppress-next-line PhanTypeMismatchArgument Type mismatch on pass-by-ref args
5276 $this, $title, $options, $descQuery
5277 );
5278 # Don't register it now, as TraditionalImageGallery does that later.
5279 $file = $this->fetchFileNoRegister( $title, $options );
5280 $handler = $file ? $file->getHandler() : false;
5281
5282 $paramMap = [
5283 'img_alt' => 'gallery-internal-alt',
5284 'img_link' => 'gallery-internal-link',
5285 ];
5286 if ( $handler ) {
5287 $paramMap += $handler->getParamMap();
5288 // We don't want people to specify per-image widths.
5289 // Additionally the width parameter would need special casing anyhow.
5290 unset( $paramMap['img_width'] );
5291 }
5292
5293 $mwArray = $this->magicWordFactory->newArray( array_keys( $paramMap ) );
5294
5295 $label = '';
5296 $alt = null;
5297 $handlerOptions = [];
5298 $imageOptions = [];
5299 $hasAlt = false;
5300
5301 if ( isset( $matches[3] ) ) {
5302 // look for an |alt= definition while trying not to break existing
5303 // captions with multiple pipes (|) in it, until a more sensible grammar
5304 // is defined for images in galleries
5305
5306 // FIXME: Doing recursiveTagParse at this stage is a bit odd,
5307 // and different from makeImage.
5308 $matches[3] = $this->recursiveTagParse( $matches[3] );
5309 // Protect LanguageConverter markup
5310 $parameterMatches = StringUtils::delimiterExplode(
5311 '-{', '}-',
5312 '|',
5313 $matches[3],
5314 true /* nested */
5315 );
5316
5317 foreach ( $parameterMatches as $parameterMatch ) {
5318 [ $magicName, $match ] = $mwArray->matchVariableStartToEnd( trim( $parameterMatch ) );
5319 if ( !$magicName ) {
5320 // Last pipe wins.
5321 $label = $parameterMatch;
5322 continue;
5323 }
5324
5325 $paramName = $paramMap[$magicName];
5326 switch ( $paramName ) {
5327 case 'gallery-internal-alt':
5328 $hasAlt = true;
5329 $alt = $this->stripAltText( $match, false );
5330 break;
5331 case 'gallery-internal-link':
5332 $linkValue = $this->stripAltText( $match, false );
5333 if ( preg_match( '/^-{R\|(.*)}-$/', $linkValue ) ) {
5334 // Result of LanguageConverter::markNoConversion
5335 // invoked on an external link.
5336 $linkValue = substr( $linkValue, 4, -2 );
5337 }
5338 [ $type, $target ] = $this->parseLinkParameter( $linkValue );
5339 if ( $type ) {
5340 if ( $type === 'no-link' ) {
5341 $target = true;
5342 }
5343 $imageOptions[$type] = $target;
5344 }
5345 break;
5346 default:
5347 // Must be a handler specific parameter.
5348 if ( $handler->validateParam( $paramName, $match ) ) {
5349 $handlerOptions[$paramName] = $match;
5350 } else {
5351 // Guess not, consider it as caption.
5352 $this->logger->debug(
5353 "$parameterMatch failed parameter validation" );
5354 $label = $parameterMatch;
5355 }
5356 }
5357 }
5358 }
5359
5360 // Match makeImage when !$hasVisibleCaption
5361 if ( !$hasAlt ) {
5362 if ( $label !== '' ) {
5363 $alt = $this->stripAltText( $label, false );
5364 } else {
5365 if ( $enableLegacyMediaDOM ) {
5366 $alt = $title->getText();
5367 }
5368 }
5369 }
5370 $imageOptions['title'] = $this->stripAltText( $label, false );
5371
5372 // Match makeImage which sets this unconditionally
5373 $handlerOptions['targetlang'] = $this->getTargetLanguage()->getCode();
5374
5375 $ig->add(
5376 $title, $label, $alt, '', $handlerOptions,
5377 ImageGalleryBase::LOADING_DEFAULT, $imageOptions
5378 );
5379 }
5380 $html = $ig->toHTML();
5381 $this->hookRunner->onAfterParserFetchFileAndTitle( $this, $ig, $html );
5382 return $html;
5383 }
5384
5389 private function getImageParams( $handler ) {
5390 if ( $handler ) {
5391 $handlerClass = get_class( $handler );
5392 } else {
5393 $handlerClass = '';
5394 }
5395 if ( !isset( $this->mImageParams[$handlerClass] ) ) {
5396 # Initialise static lists
5397 static $internalParamNames = [
5398 'horizAlign' => [ 'left', 'right', 'center', 'none' ],
5399 'vertAlign' => [ 'baseline', 'sub', 'super', 'top', 'text-top', 'middle',
5400 'bottom', 'text-bottom' ],
5401 'frame' => [ 'thumbnail', 'framed', 'frameless', 'border',
5402 // These parameters take arguments, so to ensure literals
5403 // have precedence, keep them listed last (T372935):
5404 'manualthumb', 'upright', 'link', 'alt', 'class' ],
5405 ];
5406 static $internalParamMap;
5407 if ( !$internalParamMap ) {
5408 $internalParamMap = [];
5409 foreach ( $internalParamNames as $type => $names ) {
5410 foreach ( $names as $name ) {
5411 // For grep: img_left, img_right, img_center, img_none,
5412 // img_baseline, img_sub, img_super, img_top, img_text_top, img_middle,
5413 // img_bottom, img_text_bottom,
5414 // img_thumbnail, img_manualthumb, img_framed, img_frameless, img_upright,
5415 // img_border, img_link, img_alt, img_class
5416 $magicName = str_replace( '-', '_', "img_$name" );
5417 $internalParamMap[$magicName] = [ $type, $name ];
5418 }
5419 }
5420 }
5421
5422 # Add handler params
5423 # Since img_width is one of these, it is important it is listed
5424 # *after* the literal parameter names above (T372935).
5425 $paramMap = $internalParamMap;
5426 if ( $handler ) {
5427 $handlerParamMap = $handler->getParamMap();
5428 foreach ( $handlerParamMap as $magic => $paramName ) {
5429 $paramMap[$magic] = [ 'handler', $paramName ];
5430 }
5431 } else {
5432 // Parse the size for non-existent files. See T273013
5433 $paramMap[ 'img_width' ] = [ 'handler', 'width' ];
5434 }
5435 $this->mImageParams[$handlerClass] = $paramMap;
5436 $this->mImageParamsMagicArray[$handlerClass] =
5437 $this->magicWordFactory->newArray( array_keys( $paramMap ) );
5438 }
5439 return [ $this->mImageParams[$handlerClass], $this->mImageParamsMagicArray[$handlerClass] ];
5440 }
5441
5451 public function makeImage( LinkTarget $link, $options, $holders = false ) {
5452 # Check if the options text is of the form "options|alt text"
5453 # Options are:
5454 # * thumbnail make a thumbnail with enlarge-icon and caption, alignment depends on lang
5455 # * left no resizing, just left align. label is used for alt= only
5456 # * right same, but right aligned
5457 # * none same, but not aligned
5458 # * ___px scale to ___ pixels width, no aligning. e.g. use in taxobox
5459 # * center center the image
5460 # * framed Keep original image size, no magnify-button.
5461 # * frameless like 'thumb' but without a frame. Keeps user preferences for width
5462 # * upright reduce width for upright images, rounded to full __0 px
5463 # * border draw a 1px border around the image
5464 # * alt Text for HTML alt attribute (defaults to empty)
5465 # * class Set a class for img node
5466 # * link Set the target of the image link. Can be external, interwiki, or local
5467 # vertical-align values (no % or length right now):
5468 # * baseline
5469 # * sub
5470 # * super
5471 # * top
5472 # * text-top
5473 # * middle
5474 # * bottom
5475 # * text-bottom
5476
5477 # Protect LanguageConverter markup when splitting into parts
5478 $parts = StringUtils::delimiterExplode(
5479 '-{', '}-', '|', $options, true /* allow nesting */
5480 );
5481
5482 # Give extensions a chance to select the file revision for us
5483 $options = [];
5484 $descQuery = false;
5485 $title = Title::castFromLinkTarget( $link ); // hook signature compat
5486 $this->hookRunner->onBeforeParserFetchFileAndTitle(
5487 // @phan-suppress-next-line PhanTypeMismatchArgument Type mismatch on pass-by-ref args
5488 $this, $title, $options, $descQuery
5489 );
5490 # Fetch and register the file (file title may be different via hooks)
5491 [ $file, $link ] = $this->fetchFileAndTitle( $link, $options );
5492
5493 # Get parameter map
5494 $handler = $file ? $file->getHandler() : false;
5495
5496 [ $paramMap, $mwArray ] = $this->getImageParams( $handler );
5497
5498 if ( !$file ) {
5499 $this->addTrackingCategory( 'broken-file-category' );
5500 }
5501
5502 # Process the input parameters
5503 $caption = '';
5504 $params = [ 'frame' => [], 'handler' => [],
5505 'horizAlign' => [], 'vertAlign' => [] ];
5506 $seenformat = false;
5507 foreach ( $parts as $part ) {
5508 [ $magicName, $value ] = $mwArray->matchVariableStartToEnd( trim( $part ) );
5509 $validated = false;
5510 if ( isset( $paramMap[$magicName] ) ) {
5511 [ $type, $paramName ] = $paramMap[$magicName];
5512
5513 # Special case; width and height come in one variable together
5514 if ( $type === 'handler' && $paramName === 'width' ) {
5515 // The 'px' suffix has already been localized by img_width
5516 $parsedWidthParam = $this->parseWidthParam( $value, true, true );
5517 // Parsoid applies data-(width|height) attributes to broken
5518 // media spans, for client use. See T273013
5519 $validateFunc = static function ( $name, $value ) use ( $handler ) {
5520 return $handler
5521 ? $handler->validateParam( $name, $value )
5522 : $value > 0;
5523 };
5524 if ( isset( $parsedWidthParam['width'] ) ) {
5525 $width = $parsedWidthParam['width'];
5526 if ( $validateFunc( 'width', $width ) ) {
5527 $params[$type]['width'] = $width;
5528 $validated = true;
5529 }
5530 }
5531 if ( isset( $parsedWidthParam['height'] ) ) {
5532 $height = $parsedWidthParam['height'];
5533 if ( $validateFunc( 'height', $height ) ) {
5534 $params[$type]['height'] = $height;
5535 $validated = true;
5536 }
5537 }
5538 # else no validation -- T15436
5539 } else {
5540 if ( $type === 'handler' ) {
5541 # Validate handler parameter
5542 $validated = $handler->validateParam( $paramName, $value );
5543 } else {
5544 # Validate internal parameters
5545 switch ( $paramName ) {
5546 case 'alt':
5547 case 'class':
5548 $validated = true;
5549 $value = $this->stripAltText( $value, $holders );
5550 break;
5551 case 'link':
5552 [ $paramName, $value ] =
5553 $this->parseLinkParameter(
5554 $this->stripAltText( $value, $holders )
5555 );
5556 if ( $paramName ) {
5557 $validated = true;
5558 if ( $paramName === 'no-link' ) {
5559 $value = true;
5560 }
5561 }
5562 break;
5563 case 'manualthumb':
5564 # @todo FIXME: Possibly check validity here for
5565 # manualthumb? downstream behavior seems odd with
5566 # missing manual thumbs.
5567 $value = $this->stripAltText( $value, $holders );
5568 // fall through
5569 case 'frameless':
5570 case 'framed':
5571 case 'thumbnail':
5572 // use first appearing option, discard others.
5573 $validated = !$seenformat;
5574 $seenformat = true;
5575 break;
5576 default:
5577 # Most other things appear to be empty or numeric...
5578 $validated = ( $value === false || is_numeric( trim( $value ) ) );
5579 }
5580 }
5581
5582 if ( $validated ) {
5583 $params[$type][$paramName] = $value;
5584 }
5585 }
5586 }
5587 if ( !$validated ) {
5588 $caption = $part;
5589 }
5590 }
5591
5592 # Process alignment parameters
5593 if ( $params['horizAlign'] !== [] ) {
5594 $params['frame']['align'] = array_key_first( $params['horizAlign'] );
5595 }
5596 if ( $params['vertAlign'] !== [] ) {
5597 $params['frame']['valign'] = array_key_first( $params['vertAlign'] );
5598 }
5599
5600 $params['frame']['caption'] = $caption;
5601
5602 $enableLegacyMediaDOM = $this->svcOptions->get( MainConfigNames::ParserEnableLegacyMediaDOM );
5603
5604 # Will the image be presented in a frame, with the caption below?
5605 // @phan-suppress-next-line PhanImpossibleCondition
5606 $hasVisibleCaption = isset( $params['frame']['framed'] )
5607 // @phan-suppress-next-line PhanImpossibleCondition
5608 || isset( $params['frame']['thumbnail'] )
5609 // @phan-suppress-next-line PhanImpossibleCondition
5610 || isset( $params['frame']['manualthumb'] );
5611
5612 # In the old days, [[Image:Foo|text...]] would set alt text. Later it
5613 # came to also set the caption, ordinary text after the image -- which
5614 # makes no sense, because that just repeats the text multiple times in
5615 # screen readers. It *also* came to set the title attribute.
5616 # Now that we have an alt attribute, we should not set the alt text to
5617 # equal the caption: that's worse than useless, it just repeats the
5618 # text. This is the framed/thumbnail case. If there's no caption, we
5619 # use the unnamed parameter for alt text as well, just for the time be-
5620 # ing, if the unnamed param is set and the alt param is not.
5621 # For the future, we need to figure out if we want to tweak this more,
5622 # e.g., introducing a title= parameter for the title; ignoring the un-
5623 # named parameter entirely for images without a caption; adding an ex-
5624 # plicit caption= parameter and preserving the old magic unnamed para-
5625 # meter for BC; ...
5626 if ( $hasVisibleCaption ) {
5627 if (
5628 // @phan-suppress-next-line PhanImpossibleCondition
5629 $caption === '' && !isset( $params['frame']['alt'] ) &&
5630 $enableLegacyMediaDOM
5631 ) {
5632 # No caption or alt text, add the filename as the alt text so
5633 # that screen readers at least get some description of the image
5634 $params['frame']['alt'] = $link->getText();
5635 }
5636 # Do not set $params['frame']['title'] because tooltips are unnecessary
5637 # for framed images, the caption is visible
5638 } else {
5639 // @phan-suppress-next-line PhanImpossibleCondition
5640 if ( !isset( $params['frame']['alt'] ) ) {
5641 # No alt text, use the "caption" for the alt text
5642 if ( $caption !== '' ) {
5643 $params['frame']['alt'] = $this->stripAltText( $caption, $holders );
5644 } elseif ( $enableLegacyMediaDOM ) {
5645 # No caption, fall back to using the filename for the
5646 # alt text
5647 $params['frame']['alt'] = $link->getText();
5648 }
5649 }
5650 # Use the "caption" for the tooltip text
5651 $params['frame']['title'] = $this->stripAltText( $caption, $holders );
5652 }
5653 $params['handler']['targetlang'] = $this->getTargetLanguage()->getCode();
5654
5655 // hook signature compat again, $link may have changed
5656 $title = Title::castFromLinkTarget( $link );
5657 $this->hookRunner->onParserMakeImageParams( $title, $file, $params, $this );
5658
5659 # Linker does the rest
5660 $time = $options['time'] ?? false;
5661 // @phan-suppress-next-line PhanTypePossiblyInvalidDimOffset
5662 $ret = Linker::makeImageLink( $this, $link, $file, $params['frame'], $params['handler'],
5663 $time, $descQuery, $this->mOptions->getThumbSize() );
5664
5665 # Give the handler a chance to modify the parser object
5666 if ( $handler ) {
5667 $handler->parserTransformHook( $this, $file );
5668 }
5669 if ( $file ) {
5670 $this->modifyImageHtml( $file, $params, $ret );
5671 }
5672
5673 return $ret;
5674 }
5675
5694 private function parseLinkParameter( $value ) {
5695 $chars = self::EXT_LINK_URL_CLASS;
5696 $addr = self::EXT_LINK_ADDR;
5697 $prots = $this->urlUtils->validProtocols();
5698 $type = null;
5699 $target = false;
5700 if ( $value === '' ) {
5701 $type = 'no-link';
5702 } elseif ( preg_match( "/^((?i)$prots)/", $value ) ) {
5703 if ( preg_match( "/^((?i)$prots)$addr$chars*$/u", $value ) ) {
5704 $this->mOutput->addExternalLink( $value );
5705 $type = 'link-url';
5706 $target = $value;
5707 }
5708 } else {
5709 // Percent-decode link arguments for consistency with wikilink
5710 // handling (T216003#7836261).
5711 //
5712 // There's slight concern here though. The |link= option supports
5713 // two formats, link=Test%22test vs link=[[Test%22test]], both of
5714 // which are about to be decoded.
5715 //
5716 // In the former case, the decoding here is straightforward and
5717 // desirable.
5718 //
5719 // In the latter case, there's a potential for double decoding,
5720 // because the wikilink syntax has a higher precedence and has
5721 // already been parsed as a link before we get here. $value
5722 // has had stripAltText() called on it, which in turn calls
5723 // replaceLinkHoldersText() on the link. So, the text we're
5724 // getting at this point has already been percent decoded.
5725 //
5726 // The problematic case is if %25 is in the title, since that
5727 // decodes to %, which could combine with trailing characters.
5728 // However, % is not a valid link title character, so it would
5729 // not parse as a link and the string we received here would
5730 // still contain the encoded %25.
5731 //
5732 // Hence, double decoded is not an issue. See the test,
5733 // "Should not double decode the link option"
5734 if ( strpos( $value, '%' ) !== false ) {
5735 $value = rawurldecode( $value );
5736 }
5737 $linkTitle = Title::newFromText( $value );
5738 if ( $linkTitle ) {
5739 $this->mOutput->addLink( $linkTitle );
5740 $type = 'link-title';
5741 $target = $linkTitle;
5742 }
5743 }
5744 return [ $type, $target ];
5745 }
5746
5754 public function modifyImageHtml( File $file, array $params, string &$html ) {
5755 $this->hookRunner->onParserModifyImageHTML( $this, $file, $params, $html );
5756 }
5757
5763 private function stripAltText( $caption, $holders ) {
5764 # Strip bad stuff out of the title (tooltip). We can't just use
5765 # replaceLinkHoldersText() here, because if this function is called
5766 # from handleInternalLinks2(), mLinkHolders won't be up-to-date.
5767 if ( $holders ) {
5768 $tooltip = $holders->replaceText( $caption );
5769 } else {
5770 $tooltip = $this->replaceLinkHoldersText( $caption );
5771 }
5772
5773 # make sure there are no placeholders in thumbnail attributes
5774 # that are later expanded to html- so expand them now and
5775 # remove the tags
5776 $tooltip = $this->mStripState->unstripBoth( $tooltip );
5777 # Compatibility hack! In HTML certain entity references not terminated
5778 # by a semicolon are decoded (but not if we're in an attribute; that's
5779 # how link URLs get away without properly escaping & in queries).
5780 # But wikitext has always required semicolon-termination of entities,
5781 # so encode & where needed to avoid decode of semicolon-less entities.
5782 # See T209236 and
5783 # https://www.w3.org/TR/html5/syntax.html#named-character-references
5784 # T210437 discusses moving this workaround to Sanitizer::stripAllTags.
5785 $tooltip = preg_replace( "/
5786 & # 1. entity prefix
5787 (?= # 2. followed by:
5788 (?: # a. one of the legacy semicolon-less named entities
5789 A(?:Elig|MP|acute|circ|grave|ring|tilde|uml)|
5790 C(?:OPY|cedil)|E(?:TH|acute|circ|grave|uml)|
5791 GT|I(?:acute|circ|grave|uml)|LT|Ntilde|
5792 O(?:acute|circ|grave|slash|tilde|uml)|QUOT|REG|THORN|
5793 U(?:acute|circ|grave|uml)|Yacute|
5794 a(?:acute|c(?:irc|ute)|elig|grave|mp|ring|tilde|uml)|brvbar|
5795 c(?:cedil|edil|urren)|cent(?!erdot;)|copy(?!sr;)|deg|
5796 divide(?!ontimes;)|e(?:acute|circ|grave|th|uml)|
5797 frac(?:1(?:2|4)|34)|
5798 gt(?!c(?:c|ir)|dot|lPar|quest|r(?:a(?:pprox|rr)|dot|eq(?:less|qless)|less|sim);)|
5799 i(?:acute|circ|excl|grave|quest|uml)|laquo|
5800 lt(?!c(?:c|ir)|dot|hree|imes|larr|quest|r(?:Par|i(?:e|f|));)|
5801 m(?:acr|i(?:cro|ddot))|n(?:bsp|tilde)|
5802 not(?!in(?:E|dot|v(?:a|b|c)|)|ni(?:v(?:a|b|c)|);)|
5803 o(?:acute|circ|grave|rd(?:f|m)|slash|tilde|uml)|
5804 p(?:lusmn|ound)|para(?!llel;)|quot|r(?:aquo|eg)|
5805 s(?:ect|hy|up(?:1|2|3)|zlig)|thorn|times(?!b(?:ar|)|d;)|
5806 u(?:acute|circ|grave|ml|uml)|y(?:acute|en|uml)
5807 )
5808 (?:[^;]|$)) # b. and not followed by a semicolon
5809 # S = study, for efficiency
5810 /Sx", '&amp;', $tooltip );
5811 $tooltip = Sanitizer::stripAllTags( $tooltip );
5812
5813 return $tooltip;
5814 }
5815
5825 public function attributeStripCallback( &$text, $frame = false ) {
5826 wfDeprecated( __METHOD__, '1.35' );
5827 $text = $this->replaceVariables( $text, $frame );
5828 $text = $this->mStripState->unstripBoth( $text );
5829 return $text;
5830 }
5831
5838 public function getTags(): array {
5839 return array_keys( $this->mTagHooks );
5840 }
5841
5846 public function getFunctionSynonyms() {
5847 return $this->mFunctionSynonyms;
5848 }
5849
5854 public function getUrlProtocols() {
5855 return $this->urlUtils->validProtocols();
5856 }
5857
5888 private function extractSections( $text, $sectionId, $mode, $newText, ?PageReference $page = null ) {
5889 $magicScopeVariable = $this->lock();
5890 $this->startParse(
5891 $page,
5892 ParserOptions::newFromUser( RequestContext::getMain()->getUser() ),
5893 self::OT_PLAIN,
5894 true
5895 );
5896 $outText = '';
5897 $frame = $this->getPreprocessor()->newFrame();
5898
5899 # Process section extraction flags
5900 $flags = 0;
5901 $sectionParts = explode( '-', $sectionId );
5902 // The section ID may either be a magic string such as 'new' (which should be treated as 0),
5903 // or a numbered section ID in the format of "T-<section index>".
5904 // Explicitly coerce the section index into a number accordingly. (T323373)
5905 $sectionIndex = (int)array_pop( $sectionParts );
5906 foreach ( $sectionParts as $part ) {
5907 if ( $part === 'T' ) {
5908 $flags |= Preprocessor::DOM_FOR_INCLUSION;
5909 }
5910 }
5911
5912 # Check for empty input
5913 if ( strval( $text ) === '' ) {
5914 # Only sections 0 and T-0 exist in an empty document
5915 if ( $sectionIndex === 0 ) {
5916 if ( $mode === 'get' ) {
5917 return '';
5918 }
5919
5920 return $newText;
5921 } else {
5922 if ( $mode === 'get' ) {
5923 return $newText;
5924 }
5925
5926 return $text;
5927 }
5928 }
5929
5930 # Preprocess the text
5931 $root = $this->preprocessToDom( $text, $flags );
5932
5933 # <h> nodes indicate section breaks
5934 # They can only occur at the top level, so we can find them by iterating the root's children
5935 $node = $root->getFirstChild();
5936
5937 # Find the target section
5938 if ( $sectionIndex === 0 ) {
5939 # Section zero doesn't nest, level=big
5940 $targetLevel = 1000;
5941 } else {
5942 while ( $node ) {
5943 if ( $node->getName() === 'h' ) {
5944 $bits = $node->splitHeading();
5945 if ( $bits['i'] == $sectionIndex ) {
5946 $targetLevel = $bits['level'];
5947 break;
5948 }
5949 }
5950 if ( $mode === 'replace' ) {
5951 $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG );
5952 }
5953 $node = $node->getNextSibling();
5954 }
5955 }
5956
5957 if ( !$node ) {
5958 # Not found
5959 if ( $mode === 'get' ) {
5960 return $newText;
5961 } else {
5962 return $text;
5963 }
5964 }
5965
5966 # Find the end of the section, including nested sections
5967 do {
5968 if ( $node->getName() === 'h' ) {
5969 $bits = $node->splitHeading();
5970 $curLevel = $bits['level'];
5971 // @phan-suppress-next-line PhanPossiblyUndeclaredVariable False positive
5972 if ( $bits['i'] != $sectionIndex && $curLevel <= $targetLevel ) {
5973 break;
5974 }
5975 }
5976 if ( $mode === 'get' ) {
5977 $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG );
5978 }
5979 $node = $node->getNextSibling();
5980 } while ( $node );
5981
5982 # Write out the remainder (in replace mode only)
5983 if ( $mode === 'replace' ) {
5984 # Output the replacement text
5985 # Add two newlines on -- trailing whitespace in $newText is conventionally
5986 # stripped by the editor, so we need both newlines to restore the paragraph gap
5987 # Only add trailing whitespace if there is newText
5988 if ( $newText != "" ) {
5989 $outText .= $newText . "\n\n";
5990 }
5991
5992 while ( $node ) {
5993 $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG );
5994 $node = $node->getNextSibling();
5995 }
5996 }
5997
5998 # Re-insert stripped tags
5999 $outText = rtrim( $this->mStripState->unstripBoth( $outText ) );
6000
6001 return $outText;
6002 }
6003
6019 public function getSection( $text, $sectionId, $defaultText = '' ) {
6020 return $this->extractSections( $text, $sectionId, 'get', $defaultText );
6021 }
6022
6036 public function replaceSection( $oldText, $sectionId, $newText ) {
6037 return $this->extractSections( $oldText, $sectionId, 'replace', $newText );
6038 }
6039
6069 public function getFlatSectionInfo( $text ) {
6070 $magicScopeVariable = $this->lock();
6071 $this->startParse(
6072 null,
6073 ParserOptions::newFromUser( RequestContext::getMain()->getUser() ),
6074 self::OT_PLAIN,
6075 true
6076 );
6077 $frame = $this->getPreprocessor()->newFrame();
6078 $root = $this->preprocessToDom( $text, 0 );
6079 $node = $root->getFirstChild();
6080 $offset = 0;
6081 $currentSection = [
6082 'index' => 0,
6083 'level' => 0,
6084 'offset' => 0,
6085 'heading' => '',
6086 'text' => ''
6087 ];
6088 $sections = [];
6089
6090 while ( $node ) {
6091 $nodeText = $frame->expand( $node, PPFrame::RECOVER_ORIG );
6092 if ( $node->getName() === 'h' ) {
6093 $bits = $node->splitHeading();
6094 $sections[] = $currentSection;
6095 $currentSection = [
6096 'index' => $bits['i'],
6097 'level' => $bits['level'],
6098 'offset' => $offset,
6099 'heading' => $nodeText,
6100 'text' => $nodeText
6101 ];
6102 } else {
6103 $currentSection['text'] .= $nodeText;
6104 }
6105 $offset += strlen( $nodeText );
6106 $node = $node->getNextSibling();
6107 }
6108 $sections[] = $currentSection;
6109 return $sections;
6110 }
6111
6123 public function getRevisionId() {
6124 return $this->mRevisionId;
6125 }
6126
6133 public function getRevisionRecordObject() {
6134 if ( $this->mRevisionRecordObject ) {
6135 return $this->mRevisionRecordObject;
6136 }
6137
6138 // NOTE: try to get the RevisionRecord object even if mRevisionId is null.
6139 // This is useful when parsing a revision that has not yet been saved.
6140 // However, if we get back a saved revision even though we are in
6141 // preview mode, we'll have to ignore it, see below.
6142 // NOTE: This callback may be used to inject an OLD revision that was
6143 // already loaded, so "current" is a bit of a misnomer. We can't just
6144 // skip it if mRevisionId is set.
6145 $rev = $this->mOptions->getCurrentRevisionRecordCallback()(
6146 $this->getTitle(),
6147 $this
6148 );
6149
6150 if ( !$rev ) {
6151 // The revision record callback returns `false` (not null) to
6152 // indicate that the revision is missing. (See for example
6153 // Parser::statelessFetchRevisionRecord(), the default callback.)
6154 // This API expects `null` instead. (T251952)
6155 return null;
6156 }
6157
6158 if ( $this->mRevisionId === null && $rev->getId() ) {
6159 // We are in preview mode (mRevisionId is null), and the current revision callback
6160 // returned an existing revision. Ignore it and return null, it's probably the page's
6161 // current revision, which is not what we want here. Note that we do want to call the
6162 // callback to allow the unsaved revision to be injected here, e.g. for
6163 // self-transclusion previews.
6164 return null;
6165 }
6166
6167 // If the parse is for a new revision, then the callback should have
6168 // already been set to force the object and should match mRevisionId.
6169 // If not, try to fetch by mRevisionId instead.
6170 if ( $this->mRevisionId && $rev->getId() != $this->mRevisionId ) {
6171 $rev = MediaWikiServices::getInstance()
6172 ->getRevisionLookup()
6173 ->getRevisionById( $this->mRevisionId );
6174 }
6175
6176 $this->mRevisionRecordObject = $rev;
6177
6178 return $this->mRevisionRecordObject;
6179 }
6180
6187 public function getRevisionTimestamp() {
6188 if ( $this->mRevisionTimestamp !== null ) {
6189 return $this->mRevisionTimestamp;
6190 }
6191
6192 # Use specified revision timestamp, falling back to the current timestamp
6193 $revObject = $this->getRevisionRecordObject();
6194 $timestamp = $revObject && $revObject->getTimestamp()
6195 ? $revObject->getTimestamp()
6196 : $this->mOptions->getTimestamp();
6197 $this->mOutput->setRevisionTimestampUsed( $timestamp ); // unadjusted time zone
6198
6199 # The cryptic '' timezone parameter tells to use the site-default
6200 # timezone offset instead of the user settings.
6201 # Since this value will be saved into the parser cache, served
6202 # to other users, and potentially even used inside links and such,
6203 # it needs to be consistent for all visitors.
6204 $this->mRevisionTimestamp = $this->contLang->userAdjust( $timestamp, '' );
6205
6206 return $this->mRevisionTimestamp;
6207 }
6208
6215 public function getRevisionUser(): ?string {
6216 if ( $this->mRevisionUser === null ) {
6217 $revObject = $this->getRevisionRecordObject();
6218
6219 # if this template is subst: the revision id will be blank,
6220 # so just use the current user's name
6221 if ( $revObject && $revObject->getUser() ) {
6222 $this->mRevisionUser = $revObject->getUser()->getName();
6223 } elseif ( $this->ot['wiki'] || $this->mOptions->getIsPreview() ) {
6224 $this->mRevisionUser = $this->getUserIdentity()->getName();
6225 } else {
6226 # Note that we fall through here with
6227 # $this->mRevisionUser still null
6228 }
6229 }
6230 return $this->mRevisionUser;
6231 }
6232
6239 public function getRevisionSize() {
6240 if ( $this->mRevisionSize === null ) {
6241 $revObject = $this->getRevisionRecordObject();
6242
6243 # if this variable is subst: the revision id will be blank,
6244 # so just use the parser input size, because the own substitution
6245 # will change the size.
6246 if ( $revObject ) {
6247 $this->mRevisionSize = $revObject->getSize();
6248 } else {
6249 $this->mRevisionSize = $this->mInputSize;
6250 }
6251 }
6252 return $this->mRevisionSize;
6253 }
6254
6268 public function getDefaultSort() {
6269 wfDeprecated( __METHOD__, '1.38' );
6270 return $this->mOutput->getPageProperty( 'defaultsort' ) ?? '';
6271 }
6272
6273 private static function getSectionNameFromStrippedText( $text ) {
6274 $text = Sanitizer::normalizeSectionNameWhitespace( $text );
6275 $text = Sanitizer::decodeCharReferences( $text );
6276 $text = self::normalizeSectionName( $text );
6277 return $text;
6278 }
6279
6280 private static function makeAnchor( $sectionName ) {
6281 return '#' . Sanitizer::escapeIdForLink( $sectionName );
6282 }
6283
6284 private function makeLegacyAnchor( $sectionName ) {
6285 $fragmentMode = $this->svcOptions->get( MainConfigNames::FragmentMode );
6286 if ( isset( $fragmentMode[1] ) && $fragmentMode[1] === 'legacy' ) {
6287 // ForAttribute() and ForLink() are the same for legacy encoding
6288 $id = Sanitizer::escapeIdForAttribute( $sectionName, Sanitizer::ID_FALLBACK );
6289 } else {
6290 $id = Sanitizer::escapeIdForLink( $sectionName );
6291 }
6292
6293 return "#$id";
6294 }
6295
6305 public function guessSectionNameFromWikiText( $text ) {
6306 # Strip out wikitext links(they break the anchor)
6307 $text = $this->stripSectionName( $text );
6308 $sectionName = self::getSectionNameFromStrippedText( $text );
6309 return self::makeAnchor( $sectionName );
6310 }
6311
6322 public function guessLegacySectionNameFromWikiText( $text ) {
6323 # Strip out wikitext links(they break the anchor)
6324 $text = $this->stripSectionName( $text );
6325 $sectionName = self::getSectionNameFromStrippedText( $text );
6326 return $this->makeLegacyAnchor( $sectionName );
6327 }
6328
6335 public static function guessSectionNameFromStrippedText( $text ) {
6336 $sectionName = self::getSectionNameFromStrippedText( $text );
6337 return self::makeAnchor( $sectionName );
6338 }
6339
6346 private static function normalizeSectionName( $text ) {
6347 # T90902: ensure the same normalization is applied for IDs as to links
6349 $titleParser = MediaWikiServices::getInstance()->getTitleParser();
6350 '@phan-var MediaWikiTitleCodec $titleParser';
6351 try {
6352
6353 $parts = $titleParser->splitTitleString( "#$text" );
6354 } catch ( MalformedTitleException $ex ) {
6355 return $text;
6356 }
6357 return $parts['fragment'];
6358 }
6359
6375 public function stripSectionName( $text ) {
6376 # Strip internal link markup
6377 $text = preg_replace( '/\[\[:?([^[|]+)\|([^[]+)\]\]/', '$2', $text );
6378 $text = preg_replace( '/\[\[:?([^[]+)\|?\]\]/', '$1', $text );
6379
6380 # Strip external link markup
6381 # @todo FIXME: Not tolerant to blank link text
6382 # I.E. [https://www.mediawiki.org] will render as [1] or something depending
6383 # on how many empty links there are on the page - need to figure that out.
6384 $text = preg_replace(
6385 '/\[(?i:' . $this->urlUtils->validProtocols() . ')([^ ]+?) ([^[]+)\]/', '$2', $text );
6386
6387 # Parse wikitext quotes (italics & bold)
6388 $text = $this->doQuotes( $text );
6389
6390 # Strip HTML tags
6391 $text = StringUtils::delimiterReplace( '<', '>', '', $text );
6392 return $text;
6393 }
6394
6413 public function markerSkipCallback( $s, callable $callback ) {
6414 $i = 0;
6415 $out = '';
6416 while ( $i < strlen( $s ) ) {
6417 $markerStart = strpos( $s, self::MARKER_PREFIX, $i );
6418 if ( $markerStart === false ) {
6419 $out .= $callback( substr( $s, $i ) );
6420 break;
6421 } else {
6422 $out .= $callback( substr( $s, $i, $markerStart - $i ) );
6423 $markerEnd = strpos( $s, self::MARKER_SUFFIX, $markerStart );
6424 if ( $markerEnd === false ) {
6425 $out .= substr( $s, $markerStart );
6426 break;
6427 } else {
6428 $markerEnd += strlen( self::MARKER_SUFFIX );
6429 $out .= substr( $s, $markerStart, $markerEnd - $markerStart );
6430 $i = $markerEnd;
6431 }
6432 }
6433 }
6434 return $out;
6435 }
6436
6444 public function killMarkers( $text ) {
6445 return $this->mStripState->killMarkers( $text );
6446 }
6447
6461 public function parseWidthParam( $value, $parseHeight = true, bool $localized = false ) {
6462 $parsedWidthParam = [];
6463 if ( $value === '' ) {
6464 return $parsedWidthParam;
6465 }
6466 $m = [];
6467 if ( !$localized ) {
6468 // Strip a localized 'px' suffix (T374311)
6469 $mwArray = $this->magicWordFactory->newArray( [ 'img_width' ] );
6470 [ $magicWord, $newValue ] = $mwArray->matchVariableStartToEnd( $value );
6471 $value = $magicWord ? $newValue : $value;
6472 }
6473
6474 # (T15500) In both cases (width/height and width only),
6475 # permit trailing "px" for backward compatibility.
6476 if ( $parseHeight && preg_match( '/^([0-9]*)x([0-9]*)\s*(px)?\s*$/', $value, $m ) ) {
6477 $width = intval( $m[1] );
6478 $height = intval( $m[2] );
6479 $parsedWidthParam['width'] = $width;
6480 $parsedWidthParam['height'] = $height;
6481 if ( $m[3] ?? false ) {
6482 $this->addTrackingCategory( 'double-px-category' );
6483 }
6484 } elseif ( preg_match( '/^([0-9]*)\s*(px)?\s*$/', $value, $m ) ) {
6485 $width = intval( $m[1] );
6486 $parsedWidthParam['width'] = $width;
6487 if ( $m[2] ?? false ) {
6488 $this->addTrackingCategory( 'double-px-category' );
6489 }
6490 }
6491 return $parsedWidthParam;
6492 }
6493
6502 protected function lock() {
6503 if ( $this->mInParse ) {
6504 throw new LogicException( "Parser state cleared while parsing. "
6505 . "Did you call Parser::parse recursively? Lock is held by: " . $this->mInParse );
6506 }
6507
6508 // Save the backtrace when locking, so that if some code tries locking again,
6509 // we can print the lock owner's backtrace for easier debugging
6510 $e = new RuntimeException;
6511 $this->mInParse = $e->getTraceAsString();
6512
6513 $recursiveCheck = new ScopedCallback( function () {
6514 $this->mInParse = false;
6515 } );
6516
6517 return $recursiveCheck;
6518 }
6519
6527 public function isLocked() {
6528 return (bool)$this->mInParse;
6529 }
6530
6541 public static function stripOuterParagraph( $html ) {
6542 $m = [];
6543 if ( preg_match( '/^<p>(.*)\n?<\/p>\n?$/sU', $html, $m ) && strpos( $m[1], '</p>' ) === false ) {
6544 $html = $m[1];
6545 }
6546
6547 return $html;
6548 }
6549
6560 public static function formatPageTitle( $nsText, $nsSeparator, $mainText ): string {
6561 $html = '';
6562 if ( $nsText !== '' ) {
6563 $html .= '<span class="mw-page-title-namespace">' . HtmlArmor::getHtml( $nsText ) . '</span>';
6564 $html .= '<span class="mw-page-title-separator">' . HtmlArmor::getHtml( $nsSeparator ) . '</span>';
6565 }
6566 $html .= '<span class="mw-page-title-main">' . HtmlArmor::getHtml( $mainText ) . '</span>';
6567 return $html;
6568 }
6569
6576 public static function extractBody( string $text ): string {
6577 $text = preg_replace( '!^.*?<body[^>]*>!s', '', $text, 1 );
6578 $text = preg_replace( '!</body>\s*</html>\s*$!', '', $text, 1 );
6579 return $text;
6580 }
6581
6589 public function enableOOUI() {
6590 wfDeprecated( __METHOD__, '1.35' );
6591 OutputPage::setupOOUI();
6592 $this->mOutput->setEnableOOUI( true );
6593 }
6594
6601 private function setOutputFlag( string $flag, string $reason ): void {
6602 $this->mOutput->setOutputFlag( $flag );
6603 $name = $this->getTitle()->getPrefixedText();
6604 $this->logger->debug( __METHOD__ . ": set $flag flag on '$name'; $reason" );
6605 }
6606}
6607
6609class_alias( Parser::class, 'Parser' );
const OT_WIKI
Definition Defines.php:165
const NS_FILE
Definition Defines.php:71
const NS_MEDIAWIKI
Definition Defines.php:73
const NS_TEMPLATE
Definition Defines.php:75
const NS_SPECIAL
Definition Defines.php:54
const OT_PLAIN
Definition Defines.php:167
const OT_PREPROCESS
Definition Defines.php:166
const OT_HTML
Definition Defines.php:164
const NS_MEDIA
Definition Defines.php:53
const NS_CATEGORY
Definition Defines.php:79
wfEscapeWikiText( $input)
Escapes the given text so that it may be output using addWikiText() without any linking,...
wfDeprecatedMsg( $msg, $version=false, $component=false, $callerOffset=2)
Log a deprecation warning with arbitrary message text.
wfGetUrlUtils()
wfHostname()
Get host name of the current machine, for use in error reporting.
wfSetVar(&$dest, $source, $force=false)
Sets dest to source and returns the original value of dest If source is NULL, it just returns the val...
wfMessage( $key,... $params)
This is the function for getting translated interface messages.
wfDeprecated( $function, $version=false, $component=false, $callerOffset=2)
Logs a warning that a deprecated feature was used.
Title null $mTitle
array $params
The job parameters.
if(!defined('MW_SETUP_CALLBACK'))
Definition WebStart.php:81
Implements some public methods and some protected utility functions which are required by multiple ch...
Definition File.php:79
Marks HTML that shouldn't be escaped.
Definition HtmlArmor.php:30
Class for exceptions thrown by ImageGalleryBase::factory().
Store key-value entries in a size-limited in-memory LRU cache.
Base media handler class.
Helper class for mapping value objects representing basic entities to cache keys.
This class performs some operations related to tracking categories, such as adding a tracking categor...
A class for passing options to services.
assertRequiredOptions(array $expectedKeys)
Assert that the list of options provided in this instance exactly match $expectedKeys,...
Content object implementation for representing flat text.
Group all the pieces relevant to the context of a request into one instance.
This class provides an implementation of the core hook interfaces, forwarding hook calls to HookConta...
Static utilities for manipulating HTML strings.
This class is a collection of static functions that serve two purposes:
Definition Html.php:56
Factory creating MWHttpRequest objects.
Methods for dealing with language codes.
Base class for language-specific code.
Definition Language.php:82
Variant of the Message class.
An interface for creating language converters.
isConversionDisabled()
Whether to disable language variant conversion.
A service that provides utilities to do with language names and codes.
Factory to create LinkRender objects.
Class that generates HTML for internal links.
Some internal bits split of from Skin.php.
Definition Linker.php:62
A class containing constants representing the names of configuration variables.
const EnableParserLimitReporting
Name constant for the EnableParserLimitReporting setting, for use with Config::get()
const MaxSigChars
Name constant for the MaxSigChars setting, for use with Config::get()
const ServerName
Name constant for the ServerName setting, for use with Config::get()
const ParserEnableUserLanguage
Name constant for the ParserEnableUserLanguage setting, for use with Config::get()
const AllowSlowParserFunctions
Name constant for the AllowSlowParserFunctions setting, for use with Config::get()
const AllowDisplayTitle
Name constant for the AllowDisplayTitle setting, for use with Config::get()
const StylePath
Name constant for the StylePath setting, for use with Config::get()
const MaxTocLevel
Name constant for the MaxTocLevel setting, for use with Config::get()
const Localtimezone
Name constant for the Localtimezone setting, for use with Config::get()
const Server
Name constant for the Server setting, for use with Config::get()
const FragmentMode
Name constant for the FragmentMode setting, for use with Config::get()
const EnableScaryTranscluding
Name constant for the EnableScaryTranscluding setting, for use with Config::get()
const ParsoidFragmentSupport
Name constant for the ParsoidFragmentSupport setting, for use with Config::get()
const TranscludeCacheExpiry
Name constant for the TranscludeCacheExpiry setting, for use with Config::get()
const Sitename
Name constant for the Sitename setting, for use with Config::get()
const ArticlePath
Name constant for the ArticlePath setting, for use with Config::get()
const ScriptPath
Name constant for the ScriptPath setting, for use with Config::get()
const ParserEnableLegacyMediaDOM
Name constant for the ParserEnableLegacyMediaDOM setting, for use with Config::get()
const SignatureValidation
Name constant for the SignatureValidation setting, for use with Config::get()
const MiserMode
Name constant for the MiserMode setting, for use with Config::get()
const RawHtml
Name constant for the RawHtml setting, for use with Config::get()
const PreprocessorCacheThreshold
Name constant for the PreprocessorCacheThreshold setting, for use with Config::get()
const ExtraInterlanguageLinkPrefixes
Name constant for the ExtraInterlanguageLinkPrefixes setting, for use with Config::get()
const ShowHostnames
Name constant for the ShowHostnames setting, for use with Config::get()
Service locator for MediaWiki core services.
The Message class deals with fetching and processing of interface message into a variety of formats.
Definition Message.php:155
This is one of the Core classes and should be read at least once by any new developers.
static register(Parser $parser, ServiceOptions $options)
Class for handling an array of magic words.
Store information about magic words, and create/cache MagicWord objects.
static int $inParserFactory
Track calls to Parser constructor to aid in deprecation of direct Parser invocation.
Set options of the Parser.
getMaxIncludeSize()
Maximum size of template expansions, in bytes.
getDisableTitleConversion()
Whether title conversion should be disabled.
getExpensiveParserFunctionLimit()
Maximum number of calls per parse to expensive parser functions.
getMaxPPExpandDepth()
Maximum recursion depth in PPFrame::expand()
getPreSaveTransform()
Transform wiki markup when saving the page?
getMaxPPNodeCount()
Maximum number of nodes touched by PPFrame::expand()
ParserOutput is a rendering of a Content object or a message.
setLimitReportData( $key, $value)
Sets parser limit report data for a key.
getTimeProfile(string $clock)
Returns the time that elapsed between the most recent call to resetParseStartTime() and the first cal...
hasReducedExpiry()
Check whether the cache TTL was lowered from the site default.
getCacheExpiry()
Returns the number of seconds after which this object should expire.
PHP Parser - Processes wiki markup (which uses a more user-friendly syntax, such as "[[link]]" for ma...
Definition Parser.php:148
guessLegacySectionNameFromWikiText( $text)
Same as guessSectionNameFromWikiText(), but produces legacy anchors instead, if possible.
Definition Parser.php:6322
$mExpensiveFunctionCount
Number of expensive parser function calls.
Definition Parser.php:290
callParserFunction(PPFrame $frame, $function, array $args=[], bool $inSolState=false)
Call a parser function and return an array with text and flags.
Definition Parser.php:3444
getTargetLanguageConverter()
Shorthand for getting a Language Converter for Target language.
Definition Parser.php:1650
setOutputType( $ot)
Mutator for the output type.
Definition Parser.php:1076
getBadFileLookup()
Get the BadFileLookup instance that this Parser is using.
Definition Parser.php:1251
stripSectionName( $text)
Strips a text string of wikitext for use in a section anchor.
Definition Parser.php:6375
getDefaultSort()
Accessor for the 'defaultsort' page property.
Definition Parser.php:6268
limitationWarn( $limitationType, $current='', $max='')
Warn the user when a parser limitation is reached Will warn at most once the user per limitation type...
Definition Parser.php:3003
makeImage(LinkTarget $link, $options, $holders=false)
Parse image options text and use it to make an image.
Definition Parser.php:5451
const OT_PLAIN
Output type: like Parser::extractSections() - portions of the original are returned unchanged.
Definition Parser.php:195
static guessSectionNameFromStrippedText( $text)
Like guessSectionNameFromWikiText(), but takes already-stripped text as input.
Definition Parser.php:6335
static statelessFetchTemplate( $page, $parser=false)
Static function to get a template Can be overridden via ParserOptions::setTemplateCallback().
Definition Parser.php:3705
markerSkipCallback( $s, callable $callback)
Call a callback function on all regions of the given text that are not inside strip markers,...
Definition Parser.php:6413
getPreloadText( $text, PageReference $page, ParserOptions $options, $params=[])
Process the wikitext for the "?preload=" feature.
Definition Parser.php:974
getTemplateDom(LinkTarget $title, bool $inSolState=false)
Get the semi-parsed DOM representation of a template with a given title, and its redirect destination...
Definition Parser.php:3544
getLinkRenderer()
Get a LinkRenderer instance to make links with.
Definition Parser.php:1216
parse( $text, PageReference $page, ParserOptions $options, $linestart=true, $clearState=true, $revid=null)
Convert wikitext to HTML Do not call this function recursively.
Definition Parser.php:669
tagNeedsNowikiStrippedInTagPF(string $lowerTagName)
Definition Parser.php:4016
doBlockLevels( $text, $linestart)
Make lists from lines starting with ':', '*', '#', etc.
Definition Parser.php:2826
getMagicWordFactory()
Get the MagicWordFactory that this Parser is using.
Definition Parser.php:1231
lock()
Lock the current instance of the parser.
Definition Parser.php:6502
setFunctionHook( $id, callable $callback, $flags=0)
Create a function, e.g.
Definition Parser.php:5119
const EXT_LINK_URL_CLASS
Everything except bracket, space, or control characters.
Definition Parser.php:163
preprocess( $text, ?PageReference $page, ParserOptions $options, $revid=null, $frame=false)
Expand templates and variables in the text, producing valid, static wikitext.
Definition Parser.php:927
firstCallInit()
Used to do various kinds of initialisation on the first call of the parser.
Definition Parser.php:589
guessSectionNameFromWikiText( $text)
Try to guess the section anchor name based on a wikitext fragment presumably extracted from a heading...
Definition Parser.php:6305
getUserSig(UserIdentity $user, $nickname=false, $fancySig=null)
Fetch the user's signature text, if any, and normalize to validated, ready-to-insert wikitext.
Definition Parser.php:4801
replaceVariables( $text, $frame=false, $argsOnly=false, array $options=[])
Replace magic variables, templates, and template arguments with the appropriate text.
Definition Parser.php:2939
interwikiTransclude(LinkTarget $link, $action)
Transclude an interwiki link.
Definition Parser.php:3908
validateSig( $text)
Check that the user's signature contains no bad XML.
Definition Parser.php:4868
isCurrentRevisionOfTitleCached(LinkTarget $link)
Definition Parser.php:3624
getRevisionId()
Get the ID of the revision we are parsing.
Definition Parser.php:6123
renderImageGallery( $text, array $params)
Renders an image gallery from a text with one line per image.
Definition Parser.php:5203
argSubstitution(array $piece, PPFrame $frame)
Triple brace replacement – used for template arguments.
Definition Parser.php:3979
replaceSection( $oldText, $sectionId, $newText)
This function returns $oldtext after the content of the section specified by $section has been replac...
Definition Parser.php:6036
transformMsg( $text, ParserOptions $options, ?PageReference $page=null)
Wrapper for preprocess()
Definition Parser.php:5014
insertStripItem( $text)
Add an item to the strip state Returns the unique tag which must be inserted into the stripped text T...
Definition Parser.php:1358
internalParse( $text, $isMain=true, $frame=false)
Helper function for parse() that transforms wiki markup into half-parsed HTML.
Definition Parser.php:1581
static normalizeLinkUrl( $url)
Replace unusual escape codes in a URL with their equivalent characters.
Definition Parser.php:2313
static getExternalLinkRel( $url=false, $title=null)
Get the rel attribute for a particular external link.
Definition Parser.php:2254
replaceLinkHolders(&$text)
Replace "<!--LINK-->" link placeholders with actual links, in the buffer Placeholders created in Link...
Definition Parser.php:5164
static extractTagsAndParams(array $elements, $text, &$matches)
Replaces all occurrences of HTML-style comments and the given tags in the text with a random marker a...
Definition Parser.php:1274
static statelessFetchRevisionRecord(LinkTarget $link, $parser=null)
Wrapper around RevisionLookup::getKnownCurrentRevision.
Definition Parser.php:3640
getHookRunner()
Get a HookRunner for calling core hooks.
Definition Parser.php:1684
getContentLanguage()
Get the content language that this Parser is using.
Definition Parser.php:1241
getExternalLinkAttribs( $url)
Get an associative array of additional HTML attributes appropriate for a particular external link.
Definition Parser.php:2280
parseWidthParam( $value, $parseHeight=true, bool $localized=false)
Parsed a width param of imagelink like 300px or 200x300px.
Definition Parser.php:6461
setPage(?PageReference $t=null)
Set the page used as context for parsing, e.g.
Definition Parser.php:1026
setOptions(ParserOptions $options)
Mutator for the ParserOptions object.
Definition Parser.php:1126
preSaveTransform( $text, PageReference $page, UserIdentity $user, ParserOptions $options, $clearState=true)
Transform wiki markup when saving a page by doing "\\r\\n" -> "\\n" conversion, substituting signatur...
Definition Parser.php:4680
killMarkers( $text)
Remove any strip markers found in the given text.
Definition Parser.php:6444
const OT_PREPROCESS
Output type: like Parser::preprocess()
Definition Parser.php:190
cleanSig( $text, $parsing=false)
Clean up signature text.
Definition Parser.php:4883
isLocked()
Will entry points such as parse() throw an exception due to the parser already being active?
Definition Parser.php:6527
getRevisionUser()
Get the name of the user that edited the last revision.
Definition Parser.php:6215
getFlatSectionInfo( $text)
Get an array of preprocessor section information.
Definition Parser.php:6069
getTargetLanguage()
Get the target language for the content being parsed.
Definition Parser.php:1177
clearState()
Clear Parser state.
Definition Parser.php:601
getFunctionHooks()
Get all registered function hook identifiers.
Definition Parser.php:5153
msg(string $msg,... $params)
Helper function to correctly set the target language and title of a message based on the parser conte...
Definition Parser.php:4247
enableOOUI()
Set's up the PHP implementation of OOUI for use in this request and instructs OutputPage to enable OO...
Definition Parser.php:6589
braceSubstitution(array $piece, PPFrame $frame)
Return the text of a template, after recursively replacing any variables or templates within the temp...
Definition Parser.php:3028
getUserIdentity()
Get a user either from the user set on Parser if it's set, or from the ParserOptions object otherwise...
Definition Parser.php:1196
static formatPageTitle( $nsText, $nsSeparator, $mainText)
Add HTML tags marking the parts of a page title, to be displayed in the first heading of the page.
Definition Parser.php:6560
makeLimitReport(ParserOptions $parserOptions, ParserOutput $parserOutput)
Set the limit report data in the current ParserOutput.
Definition Parser.php:769
setUser(?UserIdentity $user)
Set the current user.
Definition Parser.php:996
Options( $x=null)
Accessor/mutator for the ParserOptions object.
Definition Parser.php:1137
getHookContainer()
Get a HookContainer capable of returning metadata about hooks or running extension hooks.
Definition Parser.php:1672
getOutputType()
Accessor for the output type.
Definition Parser.php:1067
recursivePreprocess( $text, $frame=false)
Recursive parser entry point that can be called from an extension tag hook.
Definition Parser.php:954
setTitle(?Title $t=null)
Set the context title.
Definition Parser.php:1007
getRevisionSize()
Get the size of the revision.
Definition Parser.php:6239
getPreprocessor()
Get a preprocessor object.
Definition Parser.php:1206
getStripList()
Get a list of strippable XML-like elements.
Definition Parser.php:1337
extensionSubstitution(array $params, PPFrame $frame, bool $processNowiki=false)
Return the text to be used for a given extension tag.
Definition Parser.php:4040
setHook( $tag, callable $callback)
Create an HTML-style tag, e.g.
Definition Parser.php:5048
preprocessToDom( $text, $flags=0)
Get the document object model for the given wikitext.
Definition Parser.php:2906
getSection( $text, $sectionId, $defaultText='')
This function returns the text of a section, specified by a number ($section).
Definition Parser.php:6019
const OT_WIKI
Output type: like Parser::preSaveTransform()
Definition Parser.php:188
fetchTemplateAndTitle(LinkTarget $link)
Fetch the unparsed text of a template and register a reference to it.
Definition Parser.php:3663
static stripOuterParagraph( $html)
Strip outer.
Definition Parser.php:6541
getRevisionRecordObject()
Get the revision record object for $this->mRevisionId.
Definition Parser.php:6133
parseExtensionTagAsTopLevelDoc( $text)
Needed by Parsoid/PHP to ensure all the hooks for extensions are run in the right order.
Definition Parser.php:908
OutputType( $x=null)
Accessor/mutator for the output type.
Definition Parser.php:1094
clearTagHooks()
Remove all tag hooks.
Definition Parser.php:5066
modifyImageHtml(File $file, array $params, string &$html)
Give hooks a chance to modify image thumbnail HTML.
Definition Parser.php:5754
static extractBody(string $text)
Strip everything but the <body> from the provided string.
Definition Parser.php:6576
getRevisionTimestamp()
Get the timestamp associated with the current revision, adjusted for the default server-local timesta...
Definition Parser.php:6187
__clone()
Allow extensions to clean up when the parser is cloned.
Definition Parser.php:573
static cleanSigInSig( $text)
Strip 3, 4 or 5 tildes out of signatures.
Definition Parser.php:4925
__construct(ServiceOptions $svcOptions, MagicWordFactory $magicWordFactory, Language $contLang, ParserFactory $factory, UrlUtils $urlUtils, SpecialPageFactory $spFactory, LinkRendererFactory $linkRendererFactory, NamespaceInfo $nsInfo, LoggerInterface $logger, BadFileLookup $badFileLookup, LanguageConverterFactory $languageConverterFactory, LanguageNameUtils $languageNameUtils, HookContainer $hookContainer, TidyDriverBase $tidy, WANObjectCache $wanCache, UserOptionsLookup $userOptionsLookup, UserFactory $userFactory, TitleFormatter $titleFormatter, HttpRequestFactory $httpRequestFactory, TrackingCategories $trackingCategories, SignatureValidatorFactory $signatureValidatorFactory, UserNameUtils $userNameUtils)
Constructing parsers directly is not allowed! Use a ParserFactory.
Definition Parser.php:460
attributeStripCallback(&$text, $frame=false)
Callback from the Sanitizer for expanding items found in HTML attribute values, so they can be safely...
Definition Parser.php:5825
__destruct()
Reduce memory usage to reduce the impact of circular references.
Definition Parser.php:558
recursiveTagParse( $text, $frame=false)
Half-parse wikitext to half-parsed HTML.
Definition Parser.php:859
doQuotes( $text)
Helper function for handleAllQuotes()
Definition Parser.php:1984
static replaceTableOfContentsMarker( $text, $toc)
Replace table of contents marker in parsed HTML.
Definition Parser.php:4946
const OT_HTML
Output type: like Parser::parse()
Definition Parser.php:186
recursiveTagParseFully( $text, $frame=false)
Fully parse wikitext to fully parsed HTML.
Definition Parser.php:883
fetchFileNoRegister(LinkTarget $link, array $options=[])
Helper function for fetchFileAndTitle.
Definition Parser.php:3884
getPage()
Returns the page used as context for parsing, e.g.
Definition Parser.php:1049
fetchFileAndTitle(LinkTarget $link, array $options=[])
Fetch a file and its title and register a reference to it.
Definition Parser.php:3857
fetchCurrentRevisionRecordOfTitle(LinkTarget $link)
Fetch the current revision of a given title as a RevisionRecord.
Definition Parser.php:3595
startExternalParse(?PageReference $page, ParserOptions $options, $outputType, $clearState=true, $revId=null)
Set up some variables which are usually set up in parse() so that an external function can call some ...
Definition Parser.php:4979
getFunctionLang()
Get a language object for use in parser functions such as {{FORMATNUM:}}.
Definition Parser.php:1164
resetOutput()
Reset the ParserOutput.
Definition Parser.php:646
Differences from DOM schema:
const DOM_FOR_INCLUSION
Transclusion mode flag for Preprocessor::preprocessToObj()
static removeSomeTags(string $text, array $options=[])
Cleans up HTML, removes dangerous tags and attributes, and removes HTML comments; the result will alw...
WebRequest clone which takes values from a provided array.
Exception representing a failure to look up a revision.
Page revision base class.
Value object representing a content slot associated with a page revision.
Factory for handling the special page list and generating SpecialPage objects.
Parent class for all special pages.
Base class for HTML cleanup utilities.
MalformedTitleException is thrown when a TitleParser is unable to parse a title string.
A codec for MediaWiki page titles.
This is a utility class for dealing with namespaces that encodes all the "magic" behaviors of them ba...
Represents the target of a wiki link.
Represents a title within MediaWiki.
Definition Title.php:78
Provides access to user options.
Create User objects.
UserNameUtils service.
User class for the MediaWiki software.
Definition User.php:119
Library for creating and parsing MW-style timestamps.
A service to expand, parse, and otherwise manipulate URLs.
Definition UrlUtils.php:16
validProtocols()
Returns a partial regular expression of recognized URL protocols, e.g.
Definition UrlUtils.php:353
Module of static functions for generating XML.
Definition Xml.php:37
Arbitrary section name based PHP profiling.
A collection of static methods to play with strings.
static replaceMarkup( $search, $replace, $text)
More or less "markup-safe" str_replace() Ignores any instances of the separator inside <....
static explode( $separator, $subject)
Workalike for explode() with limited memory usage.
Value object representing a message parameter that consists of a list of values.
Multi-datacenter aware caching interface.
return[0=> 'ـ', 1=> ' ', 2=> '`', 3=> '´', 4=> '˜', 5=> '^', 6=> '¯', 7=> '‾', 8=> '˘', 9=> '˙', 10=> '¨', 11=> '˚', 12=> '˝', 13=> '᾽', 14=> '῝', 15=> '¸', 16=> '˛', 17=> '_', 18=> '‗', 19=> '῀', 20=> '﮲', 21=> '﮳', 22=> '﮴', 23=> '﮵', 24=> '﮶', 25=> '﮷', 26=> '﮸', 27=> '﮹', 28=> '﮺', 29=> '﮻', 30=> '﮼', 31=> '﮽', 32=> '﮾', 33=> '﮿', 34=> '﯀', 35=> '﯁', 36=> '゛', 37=> '゜', 38=> '-', 39=> '֊', 40=> '᐀', 41=> '᭠', 42=> '᠆', 43=> '᠇', 44=> '‐', 45=> '‒', 46=> '–', 47=> '—', 48=> '―', 49=> '⁓', 50=> '⸗', 51=> '゠', 52=> '・', 53=> ',', 54=> '՝', 55=> '،', 56=> '؍', 57=> '٫', 58=> '٬', 59=> '߸', 60=> '᠂', 61=> '᠈', 62=> '꓾', 63=> '꘍', 64=> '꛵', 65=> '︑', 66=> ';', 67=> '؛', 68=> '⁏', 69=> '꛶', 70=> ':', 71=> '։', 72=> '؞', 73=> '܃', 74=> '܄', 75=> '܅', 76=> '܆', 77=> '܇', 78=> '܈', 79=> '࠰', 80=> '࠱', 81=> '࠲', 82=> '࠳', 83=> '࠴', 84=> '࠵', 85=> '࠶', 86=> '࠷', 87=> '࠸', 88=> '࠹', 89=> '࠺', 90=> '࠻', 91=> '࠼', 92=> '࠽', 93=> '࠾', 94=> '፡', 95=> '፣', 96=> '፤', 97=> '፥', 98=> '፦', 99=> '᠄', 100=> '᠅', 101=> '༔', 102=> '៖', 103=> '᭝', 104=> '꧇', 105=> '᛫', 106=> '᛬', 107=> '᛭', 108=> '꛴', 109=> '!', 110=> '¡', 111=> '՜', 112=> '߹', 113=> '᥄', 114=> '?', 115=> '¿', 116=> '⸮', 117=> '՞', 118=> '؟', 119=> '܉', 120=> '፧', 121=> '᥅', 122=> '⳺', 123=> '⳻', 124=> '꘏', 125=> '꛷', 126=> '‽', 127=> '⸘', 128=> '.', 129=> '᠁', 130=> '۔', 131=> '܁', 132=> '܂', 133=> '።', 134=> '᠃', 135=> '᠉', 136=> '᙮', 137=> '᭜', 138=> '⳹', 139=> '⳾', 140=> '⸰', 141=> '꓿', 142=> '꘎', 143=> '꛳', 144=> '︒', 145=> '·', 146=> '⸱', 147=> '।', 148=> '॥', 149=> '꣎', 150=> '꣏', 151=> '᰻', 152=> '᰼', 153=> '꡶', 154=> '꡷', 155=> '᜵', 156=> '᜶', 157=> '꤯', 158=> '၊', 159=> '။', 160=> '។', 161=> '៕', 162=> '᪨', 163=> '᪩', 164=> '᪪', 165=> '᪫', 166=> '᭞', 167=> '᭟', 168=> '꧈', 169=> '꧉', 170=> '꩝', 171=> '꩞', 172=> '꩟', 173=> '꯫', 174=> '𐩖', 175=> '𐩗', 176=> '𑁇', 177=> '𑁈', 178=> '𑃀', 179=> '𑃁', 180=> '᱾', 181=> '᱿', 182=> '܀', 183=> '߷', 184=> '჻', 185=> '፠', 186=> '፨', 187=> '᨞', 188=> '᨟', 189=> '᭚', 190=> '᭛', 191=> '꧁', 192=> '꧂', 193=> '꧃', 194=> '꧄', 195=> '꧅', 196=> '꧆', 197=> '꧊', 198=> '꧋', 199=> '꧌', 200=> '꧍', 201=> '꛲', 202=> '꥟', 203=> '𐡗', 204=> '𐬺', 205=> '𐬻', 206=> '𐬼', 207=> '𐬽', 208=> '𐬾', 209=> '𐬿', 210=> '𑂾', 211=> '𑂿', 212=> '⁕', 213=> '⁖', 214=> '⁘', 215=> '⁙', 216=> '⁚', 217=> '⁛', 218=> '⁜', 219=> '⁝', 220=> '⁞', 221=> '⸪', 222=> '⸫', 223=> '⸬', 224=> '⸭', 225=> '⳼', 226=> '⳿', 227=> '⸙', 228=> '𐤿', 229=> '𐄀', 230=> '𐄁', 231=> '𐄂', 232=> '𐎟', 233=> '𐏐', 234=> '𐤟', 235=> '𒑰', 236=> '𒑱', 237=> '𒑲', 238=> '𒑳', 239=> '\'', 240=> '‘', 241=> '’', 242=> '‚', 243=> '‛', 244=> '‹', 245=> '›', 246=> '"', 247 => '“', 248 => '”', 249 => '„', 250 => '‟', 251 => '«', 252 => '»', 253 => '(', 254 => ')', 255 => '[', 256 => ']', 257 => '{', 258 => '}', 259 => '༺', 260 => '༻', 261 => '༼', 262 => '༽', 263 => '᚛', 264 => '᚜', 265 => '⁅', 266 => '⁆', 267 => '⧼', 268 => '⧽', 269 => '⦃', 270 => '⦄', 271 => '⦅', 272 => '⦆', 273 => '⦇', 274 => '⦈', 275 => '⦉', 276 => '⦊', 277 => '⦋', 278 => '⦌', 279 => '⦍', 280 => '⦎', 281 => '⦏', 282 => '⦐', 283 => '⦑', 284 => '⦒', 285 => '⦓', 286 => '⦔', 287 => '⦕', 288 => '⦖', 289 => '⦗', 290 => '⦘', 291 => '⟬', 292 => '⟭', 293 => '⟮', 294 => '⟯', 295 => '⸂', 296 => '⸃', 297 => '⸄', 298 => '⸅', 299 => '⸉', 300 => '⸊', 301 => '⸌', 302 => '⸍', 303 => '⸜', 304 => '⸝', 305 => '⸠', 306 => '⸡', 307 => '⸢', 308 => '⸣', 309 => '⸤', 310 => '⸥', 311 => '⸦', 312 => '⸧', 313 => '⸨', 314 => '⸩', 315 => '〈', 316 => '〉', 317 => '「', 318 => '」', 319 => '﹝', 320 => '﹞', 321 => '︗', 322 => '︘', 323 => '﴾', 324 => '﴿', 325 => '§', 326 => '¶', 327 => '⁋', 328 => '©', 329 => '®', 330 => '@', 331 => '*', 332 => '⁎', 333 => '⁑', 334 => '٭', 335 => '꙳', 336 => '/', 337 => '⁄', 338 => '\\', 339 => '&', 340 => '⅋', 341 => '⁊', 342 => '#', 343 => '%', 344 => '٪', 345 => '‰', 346 => '؉', 347 => '‱', 348 => '؊', 349 => '⁒', 350 => '†', 351 => '‡', 352 => '•', 353 => '‣', 354 => '‧', 355 => '⁃', 356 => '⁌', 357 => '⁍', 358 => '′', 359 => '‵', 360 => '‸', 361 => '※', 362 => '‿', 363 => '⁔', 364 => '⁀', 365 => '⁐', 366 => '⁁', 367 => '⁂', 368 => '⸀', 369 => '⸁', 370 => '⸆', 371 => '⸇', 372 => '⸈', 373 => '⸋', 374 => '⸎', 375 => '⸏', 376 => '⸐', 377 => '⸑', 378 => '⸒', 379 => '⸓', 380 => '⸔', 381 => '⸕', 382 => '⸖', 383 => '⸚', 384 => '⸛', 385 => '⸞', 386 => '⸟', 387 => '꙾', 388 => '՚', 389 => '՛', 390 => '՟', 391 => '־', 392 => '׀', 393 => '׃', 394 => '׆', 395 => '׳', 396 => '״', 397 => '܊', 398 => '܋', 399 => '܌', 400 => '܍', 401 => '࡞', 402 => '᠀', 403 => '॰', 404 => '꣸', 405 => '꣹', 406 => '꣺', 407 => '෴', 408 => '๚', 409 => '๛', 410 => '꫞', 411 => '꫟', 412 => '༄', 413 => '༅', 414 => '༆', 415 => '༇', 416 => '༈', 417 => '༉', 418 => '༊', 419 => '࿐', 420 => '࿑', 421 => '་', 422 => '།', 423 => '༎', 424 => '༏', 425 => '༐', 426 => '༑', 427 => '༒', 428 => '྅', 429 => '࿒', 430 => '࿓', 431 => '࿔', 432 => '࿙', 433 => '࿚', 434 => '᰽', 435 => '᰾', 436 => '᰿', 437 => '᥀', 438 => '၌', 439 => '၍', 440 => '၎', 441 => '၏', 442 => '႞', 443 => '႟', 444 => '꩷', 445 => '꩸', 446 => '꩹', 447 => 'ៗ', 448 => '៘', 449 => '៙', 450 => '៚', 451 => '᪠', 452 => '᪡', 453 => '᪢', 454 => '᪣', 455 => '᪤', 456 => '᪥', 457 => '᪦', 458 => '᪬', 459 => '᪭', 460 => '᙭', 461 => '⵰', 462 => '꡴', 463 => '꡵', 464 => '᯼', 465 => '᯽', 466 => '᯾', 467 => '᯿', 468 => '꤮', 469 => '꧞', 470 => '꧟', 471 => '꩜', 472 => '𑁉', 473 => '𑁊', 474 => '𑁋', 475 => '𑁌', 476 => '𑁍', 477 => '𐩐', 478 => '𐩑', 479 => '𐩒', 480 => '𐩓', 481 => '𐩔', 482 => '𐩕', 483 => '𐩘', 484 => '𐬹', 485 => '𑂻', 486 => '𑂼', 487 => 'ʹ', 488 => '͵', 489 => 'ʺ', 490 => '˂', 491 => '˃', 492 => '˄', 493 => '˅', 494 => 'ˆ', 495 => 'ˇ', 496 => 'ˈ', 497 => 'ˉ', 498 => 'ˊ', 499 => 'ˋ', 500 => 'ˌ', 501 => 'ˍ', 502 => 'ˎ', 503 => 'ˏ', 504 => '˒', 505 => '˓', 506 => '˔', 507 => '˕', 508 => '˖', 509 => '˗', 510 => '˞', 511 => '˟', 512 => '˥', 513 => '˦', 514 => '˧', 515 => '˨', 516 => '˩', 517 => '˪', 518 => '˫', 519 => 'ˬ', 520 => '˭', 521 => '˯', 522 => '˰', 523 => '˱', 524 => '˲', 525 => '˳', 526 => '˴', 527 => '˵', 528 => '˶', 529 => '˷', 530 => '˸', 531 => '˹', 532 => '˺', 533 => '˻', 534 => '˼', 535 => '˽', 536 => '˾', 537 => '˿', 538 => '᎐', 539 => '᎑', 540 => '᎒', 541 => '᎓', 542 => '᎔', 543 => '᎕', 544 => '᎖', 545 => '᎗', 546 => '᎘', 547 => '᎙', 548 => '꜀', 549 => '꜁', 550 => '꜂', 551 => '꜃', 552 => '꜄', 553 => '꜅', 554 => '꜆', 555 => '꜇', 556 => '꜈', 557 => '꜉', 558 => '꜊', 559 => '꜋', 560 => '꜌', 561 => '꜍', 562 => '꜎', 563 => '꜏', 564 => '꜐', 565 => '꜑', 566 => '꜒', 567 => '꜓', 568 => '꜔', 569 => '꜕', 570 => '꜖', 571 => 'ꜗ', 572 => 'ꜘ', 573 => 'ꜙ', 574 => 'ꜚ', 575 => 'ꜛ', 576 => 'ꜜ', 577 => 'ꜝ', 578 => 'ꜞ', 579 => 'ꜟ', 580 => '꜠', 581 => '꜡', 582 => 'ꞈ', 583 => '꞉', 584 => '꞊', 585 => '°', 586 => '҂', 587 => '؈', 588 => '؎', 589 => '؏', 590 => '۞', 591 => '۩', 592 => '﷽', 593 => '߶', 594 => '৺', 595 => '୰', 596 => '௳', 597 => '௴', 598 => '௵', 599 => '௶', 600 => '௷', 601 => '௸', 602 => '௺', 603 => '౿', 604 => '൹', 605 => '꠨', 606 => '꠩', 607 => '꠪', 608 => '꠫', 609 => '꠶', 610 => '꠷', 611 => '꠹', 612 => '๏', 613 => '༁', 614 => '༂', 615 => '༃', 616 => '༓', 617 => '༕', 618 => '༖', 619 => '༗', 620 => '༚', 621 => '༛', 622 => '༜', 623 => '༝', 624 => '༞', 625 => '༟', 626 => '༴', 627 => '༶', 628 => '༸', 629 => '྾', 630 => '྿', 631 => '࿀', 632 => '࿁', 633 => '࿂', 634 => '࿃', 635 => '࿄', 636 => '࿅', 637 => '࿇', 638 => '࿈', 639 => '࿉', 640 => '࿊', 641 => '࿋', 642 => '࿌', 643 => '࿎', 644 => '࿏', 645 => '࿕', 646 => '࿖', 647 => '࿗', 648 => '࿘', 649 => '᧠', 650 => '᧡', 651 => '᧢', 652 => '᧣', 653 => '᧤', 654 => '᧥', 655 => '᧦', 656 => '᧧', 657 => '᧨', 658 => '᧩', 659 => '᧪', 660 => '᧫', 661 => '᧬', 662 => '᧭', 663 => '᧮', 664 => '᧯', 665 => '᧰', 666 => '᧱', 667 => '᧲', 668 => '᧳', 669 => '᧴', 670 => '᧵', 671 => '᧶', 672 => '᧷', 673 => '᧸', 674 => '᧹', 675 => '᧺', 676 => '᧻', 677 => '᧼', 678 => '᧽', 679 => '᧾', 680 => '᧿', 681 => '᭡', 682 => '᭢', 683 => '᭣', 684 => '᭤', 685 => '᭥', 686 => '᭦', 687 => '᭧', 688 => '᭨', 689 => '᭩', 690 => '᭪', 691 => '᭴', 692 => '᭵', 693 => '᭶', 694 => '᭷', 695 => '᭸', 696 => '᭹', 697 => '᭺', 698 => '᭻', 699 => '᭼', 700 => '℄', 701 => '℈', 702 => '℔', 703 => '℗', 704 => '℘', 705 => '℞', 706 => '℟', 707 => '℣', 708 => '℥', 709 => '℧', 710 => '℩', 711 => '℮', 712 => '℺', 713 => '⅁', 714 => '⅂', 715 => '⅃', 716 => '⅄', 717 => '⅊', 718 => '⅌', 719 => '⅍', 720 => '⅏', 721 => '←', 722 => '→', 723 => '↑', 724 => '↓', 725 => '↔', 726 => '↕', 727 => '↖', 728 => '↗', 729 => '↘', 730 => '↙', 731 => '↜', 732 => '↝', 733 => '↞', 734 => '↟', 735 => '↠', 736 => '↡', 737 => '↢', 738 => '↣', 739 => '↤', 740 => '↥', 741 => '↦', 742 => '↧', 743 => '↨', 744 => '↩', 745 => '↪', 746 => '↫', 747 => '↬', 748 => '↭', 749 => '↯', 750 => '↰', 751 => '↱', 752 => '↲', 753 => '↳', 754 => '↴', 755 => '↵', 756 => '↶', 757 => '↷', 758 => '↸', 759 => '↹', 760 => '↺', 761 => '↻', 762 => '↼', 763 => '↽', 764 => '↾', 765 => '↿', 766 => '⇀', 767 => '⇁', 768 => '⇂', 769 => '⇃', 770 => '⇄', 771 => '⇅', 772 => '⇆', 773 => '⇇', 774 => '⇈', 775 => '⇉', 776 => '⇊', 777 => '⇋', 778 => '⇌', 779 => '⇐', 780 => '⇑', 781 => '⇒', 782 => '⇓', 783 => '⇔', 784 => '⇕', 785 => '⇖', 786 => '⇗', 787 => '⇘', 788 => '⇙', 789 => '⇚', 790 => '⇛', 791 => '⇜', 792 => '⇝', 793 => '⇞', 794 => '⇟', 795 => '⇠', 796 => '⇡', 797 => '⇢', 798 => '⇣', 799 => '⇤', 800 => '⇥', 801 => '⇦', 802 => '⇧', 803 => '⇨', 804 => '⇩', 805 => '⇪', 806 => '⇫', 807 => '⇬', 808 => '⇭', 809 => '⇮', 810 => '⇯', 811 => '⇰', 812 => '⇱', 813 => '⇲', 814 => '⇳', 815 => '⇴', 816 => '⇵', 817 => '⇶', 818 => '⇷', 819 => '⇸', 820 => '⇹', 821 => '⇺', 822 => '⇻', 823 => '⇼', 824 => '⇽', 825 => '⇾', 826 => '⇿', 827 => '∀', 828 => '∁', 829 => '∂', 830 => '∃', 831 => '∅', 832 => '∆', 833 => '∇', 834 => '∈', 835 => '∊', 836 => '∋', 837 => '∍', 838 => '϶', 839 => '∎', 840 => '∏', 841 => '∐', 842 => '∑', 843 => '+', 844 => '±', 845 => '÷', 846 => '×', 847 => '<', 848 => '=', 849 => '>', 850 => '¬', 851 => '|', 852 => '¦', 853 => '‖', 854 => '~', 855 => '−', 856 => '∓', 857 => '∔', 858 => '∕', 859 => '∖', 860 => '∗', 861 => '∘', 862 => '∙', 863 => '√', 864 => '∛', 865 => '؆', 866 => '∜', 867 => '؇', 868 => '∝', 869 => '∞', 870 => '∟', 871 => '∠', 872 => '∡', 873 => '∢', 874 => '∣', 875 => '∥', 876 => '∧', 877 => '∨', 878 => '∩', 879 => '∪', 880 => '∫', 881 => '∮', 882 => '∱', 883 => '∲', 884 => '∳', 885 => '∴', 886 => '∵', 887 => '∶', 888 => '∷', 889 => '∸', 890 => '∹', 891 => '∺', 892 => '∻', 893 => '∼', 894 => '∽', 895 => '∾', 896 => '∿', 897 => '≀', 898 => '≂', 899 => '≃', 900 => '≅', 901 => '≆', 902 => '≈', 903 => '≊', 904 => '≋', 905 => '≌', 906 => '≍', 907 => '≎', 908 => '≏', 909 => '≐', 910 => '≑', 911 => '≒', 912 => '≓', 913 => '≔', 914 => '≕', 915 => '≖', 916 => '≗', 917 => '≘', 918 => '≙', 919 => '≚', 920 => '≛', 921 => '≜', 922 => '≝', 923 => '≞', 924 => '≟', 925 => '≡', 926 => '≣', 927 => '≤', 928 => '≥', 929 => '≦', 930 => '≧', 931 => '≨', 932 => '≩', 933 => '≪', 934 => '≫', 935 => '≬', 936 => '≲', 937 => '≳', 938 => '≶', 939 => '≷', 940 => '≺', 941 => '≻', 942 => '≼', 943 => '≽', 944 => '≾', 945 => '≿', 946 => '⊂', 947 => '⊃', 948 => '⊆', 949 => '⊇', 950 => '⊊', 951 => '⊋', 952 => '⊌', 953 => '⊍', 954 => '⊎', 955 => '⊏', 956 => '⊐', 957 => '⊑', 958 => '⊒', 959 => '⊓', 960 => '⊔', 961 => '⊕', 962 => '⊖', 963 => '⊗', 964 => '⊘', 965 => '⊙', 966 => '⊚', 967 => '⊛', 968 => '⊜', 969 => '⊝', 970 => '⊞', 971 => '⊟', 972 => '⊠', 973 => '⊡', 974 => '⊢', 975 => '⊣', 976 => '⊤', 977 => '⊥', 978 => '⊦', 979 => '⊧', 980 => '⊨', 981 => '⊩', 982 => '⊪', 983 => '⊫', 984 => '⊰', 985 => '⊱', 986 => '⊲', 987 => '⊳', 988 => '⊴', 989 => '⊵', 990 => '⊶', 991 => '⊷', 992 => '⊸', 993 => '⊹', 994 => '⊺', 995 => '⊻', 996 => '⊼', 997 => '⊽', 998 => '⊾', 999 => '⊿', 1000 => '⋀', 1001 => '⋁', 1002 => '⋂', 1003 => '⋃', 1004 => '⋄', 1005 => '⋅', 1006 => '⋆', 1007 => '⋇', 1008 => '⋈', 1009 => '⋉', 1010 => '⋊', 1011 => '⋋', 1012 => '⋌', 1013 => '⋍', 1014 => '⋎', 1015 => '⋏', 1016 => '⋐', 1017 => '⋑', 1018 => '⋒', 1019 => '⋓', 1020 => '⋔', 1021 => '⋕', 1022 => '⋖', 1023 => '⋗', 1024 => '⋘', 1025 => '⋙', 1026 => '⋚', 1027 => '⋛', 1028 => '⋜', 1029 => '⋝', 1030 => '⋞', 1031 => '⋟', 1032 => '⋤', 1033 => '⋥', 1034 => '⋦', 1035 => '⋧', 1036 => '⋨', 1037 => '⋩', 1038 => '⋮', 1039 => '⋯', 1040 => '⋰', 1041 => '⋱', 1042 => '⋲', 1043 => '⋳', 1044 => '⋴', 1045 => '⋵', 1046 => '⋶', 1047 => '⋷', 1048 => '⋸', 1049 => '⋹', 1050 => '⋺', 1051 => '⋻', 1052 => '⋼', 1053 => '⋽', 1054 => '⋾', 1055 => '⋿', 1056 => '⌀', 1057 => '⌁', 1058 => '⌂', 1059 => '⌃', 1060 => '⌄', 1061 => '⌅', 1062 => '⌆', 1063 => '⌇', 1064 => '⌈', 1065 => '⌉', 1066 => '⌊', 1067 => '⌋', 1068 => '⌌', 1069 => '⌍', 1070 => '⌎', 1071 => '⌏', 1072 => '⌐', 1073 => '⌑', 1074 => '⌒', 1075 => '⌓', 1076 => '⌔', 1077 => '⌕', 1078 => '⌖', 1079 => '⌗', 1080 => '⌘', 1081 => '⌙', 1082 => '⌚', 1083 => '⌛', 1084 => '⌜', 1085 => '⌝', 1086 => '⌞', 1087 => '⌟', 1088 => '⌠', 1089 => '⌡', 1090 => '⌢', 1091 => '⌣', 1092 => '⌤', 1093 => '⌥', 1094 => '⌦', 1095 => '⌧', 1096 => '⌨', 1097 => '⌫', 1098 => '⌬', 1099 => '⌭', 1100 => '⌮', 1101 => '⌯', 1102 => '⌰', 1103 => '⌱', 1104 => '⌲', 1105 => '⌳', 1106 => '⌴', 1107 => '⌵', 1108 => '⌶', 1109 => '⌷', 1110 => '⌸', 1111 => '⌹', 1112 => '⌺', 1113 => '⌻', 1114 => '⌼', 1115 => '⌽', 1116 => '⌾', 1117 => '⌿', 1118 => '⍀', 1119 => '⍁', 1120 => '⍂', 1121 => '⍃', 1122 => '⍄', 1123 => '⍅', 1124 => '⍆', 1125 => '⍇', 1126 => '⍈', 1127 => '⍉', 1128 => '⍊', 1129 => '⍋', 1130 => '⍌', 1131 => '⍍', 1132 => '⍎', 1133 => '⍏', 1134 => '⍐', 1135 => '⍑', 1136 => '⍒', 1137 => '⍓', 1138 => '⍔', 1139 => '⍕', 1140 => '⍖', 1141 => '⍗', 1142 => '⍘', 1143 => '⍙', 1144 => '⍚', 1145 => '⍛', 1146 => '⍜', 1147 => '⍝', 1148 => '⍞', 1149 => '⍟', 1150 => '⍠', 1151 => '⍡', 1152 => '⍢', 1153 => '⍣', 1154 => '⍤', 1155 => '⍥', 1156 => '⍦', 1157 => '⍧', 1158 => '⍨', 1159 => '⍩', 1160 => '⍪', 1161 => '⍫', 1162 => '⍬', 1163 => '⍭', 1164 => '⍮', 1165 => '⍯', 1166 => '⍰', 1167 => '⍱', 1168 => '⍲', 1169 => '⍳', 1170 => '⍴', 1171 => '⍵', 1172 => '⍶', 1173 => '⍷', 1174 => '⍸', 1175 => '⍹', 1176 => '⍺', 1177 => '⍻', 1178 => '⍼', 1179 => '⍽', 1180 => '⍾', 1181 => '⍿', 1182 => '⎀', 1183 => '⎁', 1184 => '⎂', 1185 => '⎃', 1186 => '⎄', 1187 => '⎅', 1188 => '⎆', 1189 => '⎇', 1190 => '⎈', 1191 => '⎉', 1192 => '⎊', 1193 => '⎋', 1194 => '⎌', 1195 => '⎍', 1196 => '⎎', 1197 => '⎏', 1198 => '⎐', 1199 => '⎑', 1200 => '⎒', 1201 => '⎓', 1202 => '⎔', 1203 => '⎕', 1204 => '⎖', 1205 => '⎗', 1206 => '⎘', 1207 => '⎙', 1208 => '⎚', 1209 => '⎛', 1210 => '⎜', 1211 => '⎝', 1212 => '⎞', 1213 => '⎟', 1214 => '⎠', 1215 => '⎡', 1216 => '⎢', 1217 => '⎣', 1218 => '⎤', 1219 => '⎥', 1220 => '⎦', 1221 => '⎧', 1222 => '⎨', 1223 => '⎩', 1224 => '⎪', 1225 => '⎫', 1226 => '⎬', 1227 => '⎭', 1228 => '⎮', 1229 => '⎯', 1230 => '⎰', 1231 => '⎱', 1232 => '⎲', 1233 => '⎳', 1234 => '⎴', 1235 => '⎵', 1236 => '⎶', 1237 => '⎷', 1238 => '⎸', 1239 => '⎹', 1240 => '⎺', 1241 => '⎻', 1242 => '⎼', 1243 => '⎽', 1244 => '⎾', 1245 => '⎿', 1246 => '⏀', 1247 => '⏁', 1248 => '⏂', 1249 => '⏃', 1250 => '⏄', 1251 => '⏅', 1252 => '⏆', 1253 => '⏇', 1254 => '⏈', 1255 => '⏉', 1256 => '⏊', 1257 => '⏋', 1258 => '⏌', 1259 => '⏍', 1260 => '⏎', 1261 => '⏏', 1262 => '⏐', 1263 => '⏑', 1264 => '⏒', 1265 => '⏓', 1266 => '⏔', 1267 => '⏕', 1268 => '⏖', 1269 => '⏗', 1270 => '⏘', 1271 => '⏙', 1272 => '⏚', 1273 => '⏛', 1274 => '⏜', 1275 => '⏝', 1276 => '⏞', 1277 => '⏟', 1278 => '⏠', 1279 => '⏡', 1280 => '⏢', 1281 => '⏣', 1282 => '⏤', 1283 => '⏥', 1284 => '⏦', 1285 => '⏧', 1286 => '⏨', 1287 => '⏩', 1288 => '⏪', 1289 => '⏫', 1290 => '⏬', 1291 => '⏭', 1292 => '⏮', 1293 => '⏯', 1294 => '⏰', 1295 => '⏱', 1296 => '⏲', 1297 => '⏳', 1298 => '␀', 1299 => '␁', 1300 => '␂', 1301 => '␃', 1302 => '␄', 1303 => '␅', 1304 => '␆', 1305 => '␇', 1306 => '␈', 1307 => '␉', 1308 => '␊', 1309 => '␋', 1310 => '␌', 1311 => '␍', 1312 => '␎', 1313 => '␏', 1314 => '␐', 1315 => '␑', 1316 => '␒', 1317 => '␓', 1318 => '␔', 1319 => '␕', 1320 => '␖', 1321 => '␗', 1322 => '␘', 1323 => '␙', 1324 => '␚', 1325 => '␛', 1326 => '␜', 1327 => '␝', 1328 => '␞', 1329 => '␟', 1330 => '␠', 1331 => '␡', 1332 => '␢', 1333 => '␣', 1334 => '␤', 1335 => '␥', 1336 => '␦', 1337 => '⑀', 1338 => '⑁', 1339 => '⑂', 1340 => '⑃', 1341 => '⑄', 1342 => '⑅', 1343 => '⑆', 1344 => '⑇', 1345 => '⑈', 1346 => '⑉', 1347 => '⑊', 1348 => '─', 1349 => '━', 1350 => '│', 1351 => '┃', 1352 => '┄', 1353 => '┅', 1354 => '┆', 1355 => '┇', 1356 => '┈', 1357 => '┉', 1358 => '┊', 1359 => '┋', 1360 => '┌', 1361 => '┍', 1362 => '┎', 1363 => '┏', 1364 => '┐', 1365 => '┑', 1366 => '┒', 1367 => '┓', 1368 => '└', 1369 => '┕', 1370 => '┖', 1371 => '┗', 1372 => '┘', 1373 => '┙', 1374 => '┚', 1375 => '┛', 1376 => '├', 1377 => '┝', 1378 => '┞', 1379 => '┟', 1380 => '┠', 1381 => '┡', 1382 => '┢', 1383 => '┣', 1384 => '┤', 1385 => '┥', 1386 => '┦', 1387 => '┧', 1388 => '┨', 1389 => '┩', 1390 => '┪', 1391 => '┫', 1392 => '┬', 1393 => '┭', 1394 => '┮', 1395 => '┯', 1396 => '┰', 1397 => '┱', 1398 => '┲', 1399 => '┳', 1400 => '┴', 1401 => '┵', 1402 => '┶', 1403 => '┷', 1404 => '┸', 1405 => '┹', 1406 => '┺', 1407 => '┻', 1408 => '┼', 1409 => '┽', 1410 => '┾', 1411 => '┿', 1412 => '╀', 1413 => '╁', 1414 => '╂', 1415 => '╃', 1416 => '╄', 1417 => '╅', 1418 => '╆', 1419 => '╇', 1420 => '╈', 1421 => '╉', 1422 => '╊', 1423 => '╋', 1424 => '╌', 1425 => '╍', 1426 => '╎', 1427 => '╏', 1428 => '═', 1429 => '║', 1430 => '╒', 1431 => '╓', 1432 => '╔', 1433 => '╕', 1434 => '╖', 1435 => '╗', 1436 => '╘', 1437 => '╙', 1438 => '╚', 1439 => '╛', 1440 => '╜', 1441 => '╝', 1442 => '╞', 1443 => '╟', 1444 => '╠', 1445 => '╡', 1446 => '╢', 1447 => '╣', 1448 => '╤', 1449 => '╥', 1450 => '╦', 1451 => '╧', 1452 => '╨', 1453 => '╩', 1454 => '╪', 1455 => '╫', 1456 => '╬', 1457 => '╭', 1458 => '╮', 1459 => '╯', 1460 => '╰', 1461 => '╱', 1462 => '╲', 1463 => '╳', 1464 => '╴', 1465 => '╵', 1466 => '╶', 1467 => '╷', 1468 => '╸', 1469 => '╹', 1470 => '╺', 1471 => '╻', 1472 => '╼', 1473 => '╽', 1474 => '╾', 1475 => '╿', 1476 => '▀', 1477 => '▁', 1478 => '▂', 1479 => '▃', 1480 => '▄', 1481 => '▅', 1482 => '▆', 1483 => '▇', 1484 => '█', 1485 => '▉', 1486 => '▊', 1487 => '▋', 1488 => '▌', 1489 => '▍', 1490 => '▎', 1491 => '▏', 1492 => '▐', 1493 => '░', 1494 => '▒', 1495 => '▓', 1496 => '▔', 1497 => '▕', 1498 => '▖', 1499 => '▗', 1500 => '▘', 1501 => '▙', 1502 => '▚', 1503 => '▛', 1504 => '▜', 1505 => '▝', 1506 => '▞', 1507 => '▟', 1508 => '■', 1509 => '□', 1510 => '▢', 1511 => '▣', 1512 => '▤', 1513 => '▥', 1514 => '▦', 1515 => '▧', 1516 => '▨', 1517 => '▩', 1518 => '▪', 1519 => '▫', 1520 => '▬', 1521 => '▭', 1522 => '▮', 1523 => '▯', 1524 => '▰', 1525 => '▱', 1526 => '▲', 1527 => '△', 1528 => '▴', 1529 => '▵', 1530 => '▶', 1531 => '▷', 1532 => '▸', 1533 => '▹', 1534 => '►', 1535 => '▻', 1536 => '▼', 1537 => '▽', 1538 => '▾', 1539 => '▿', 1540 => '◀', 1541 => '◁', 1542 => '◂', 1543 => '◃', 1544 => '◄', 1545 => '◅', 1546 => '◆', 1547 => '◇', 1548 => '◈', 1549 => '◉', 1550 => '◊', 1551 => '○', 1552 => '◌', 1553 => '◍', 1554 => '◎', 1555 => '●', 1556 => '◐', 1557 => '◑', 1558 => '◒', 1559 => '◓', 1560 => '◔', 1561 => '◕', 1562 => '◖', 1563 => '◗', 1564 => '◘', 1565 => '◙', 1566 => '◚', 1567 => '◛', 1568 => '◜', 1569 => '◝', 1570 => '◞', 1571 => '◟', 1572 => '◠', 1573 => '◡', 1574 => '◢', 1575 => '◣', 1576 => '◤', 1577 => '◥', 1578 => '◦', 1579 => '◧', 1580 => '◨', 1581 => '◩', 1582 => '◪', 1583 => '◫', 1584 => '◬', 1585 => '◭', 1586 => '◮', 1587 => '◯', 1588 => '◰', 1589 => '◱', 1590 => '◲', 1591 => '◳', 1592 => '◴', 1593 => '◵', 1594 => '◶', 1595 => '◷', 1596 => '◸', 1597 => '◹', 1598 => '◺', 1599 => '◻', 1600 => '◼', 1601 => '◽', 1602 => '◾', 1603 => '◿', 1604 => '☀', 1605 => '☁', 1606 => '☂', 1607 => '☃', 1608 => '☄', 1609 => '★', 1610 => '☆', 1611 => '☇', 1612 => '☈', 1613 => '☉', 1614 => '☊', 1615 => '☋', 1616 => '☌', 1617 => '☍', 1618 => '☎', 1619 => '☏', 1620 => '☐', 1621 => '☑', 1622 => '☒', 1623 => '☓', 1624 => '☔', 1625 => '☕', 1626 => '☖', 1627 => '☗', 1628 => '☘', 1629 => '☙', 1630 => '☚', 1631 => '☛', 1632 => '☜', 1633 => '☝', 1634 => '☞', 1635 => '☟', 1636 => '☠', 1637 => '☡', 1638 => '☢', 1639 => '☣', 1640 => '☤', 1641 => '☥', 1642 => '☦', 1643 => '☧', 1644 => '☨', 1645 => '☩', 1646 => '☪', 1647 => '☫', 1648 => '☬', 1649 => '☭', 1650 => '☮', 1651 => '☯', 1652 => '☸', 1653 => '☹', 1654 => '☺', 1655 => '☻', 1656 => '☼', 1657 => '☽', 1658 => '☾', 1659 => '☿', 1660 => '♀', 1661 => '♁', 1662 => '♂', 1663 => '♃', 1664 => '♄', 1665 => '♅', 1666 => '♆', 1667 => '♇', 1668 => '♈', 1669 => '♉', 1670 => '♊', 1671 => '♋', 1672 => '♌', 1673 => '♍', 1674 => '♎', 1675 => '♏', 1676 => '♐', 1677 => '♑', 1678 => '♒', 1679 => '♓', 1680 => '♔', 1681 => '♕', 1682 => '♖', 1683 => '♗', 1684 => '♘', 1685 => '♙', 1686 => '♚', 1687 => '♛', 1688 => '♜', 1689 => '♝', 1690 => '♞', 1691 => '♟', 1692 => '♠', 1693 => '♡', 1694 => '♢', 1695 => '♣', 1696 => '♤', 1697 => '♥', 1698 => '♦', 1699 => '♧', 1700 => '♨', 1701 => '♩', 1702 => '♪', 1703 => '♫', 1704 => '♬', 1705 => '♰', 1706 => '♱', 1707 => '♲', 1708 => '♳', 1709 => '♴', 1710 => '♵', 1711 => '♶', 1712 => '♷', 1713 => '♸', 1714 => '♹', 1715 => '♺', 1716 => '♻', 1717 => '♼', 1718 => '♽', 1719 => '♾', 1720 => '♿', 1721 => '⚀', 1722 => '⚁', 1723 => '⚂', 1724 => '⚃', 1725 => '⚄', 1726 => '⚅', 1727 => '⚆', 1728 => '⚇', 1729 => '⚈', 1730 => '⚉', 1731 => '⚐', 1732 => '⚑', 1733 => '⚒', 1734 => '⚓', 1735 => '⚔', 1736 => '⚕', 1737 => '⚖', 1738 => '⚗', 1739 => '⚘', 1740 => '⚙', 1741 => '⚚', 1742 => '⚛', 1743 => '⚜', 1744 => '⚝', 1745 => '⚞', 1746 => '⚟', 1747 => '⚠', 1748 => '⚡', 1749 => '⚢', 1750 => '⚣', 1751 => '⚤', 1752 => '⚥', 1753 => '⚦', 1754 => '⚧', 1755 => '⚨', 1756 => '⚩', 1757 => '⚪', 1758 => '⚫', 1759 => '⚬', 1760 => '⚭', 1761 => '⚮', 1762 => '⚯', 1763 => '⚰', 1764 => '⚱', 1765 => '⚲', 1766 => '⚳', 1767 => '⚴', 1768 => '⚵', 1769 => '⚶', 1770 => '⚷', 1771 => '⚸', 1772 => '⚹', 1773 => '⚺', 1774 => '⚻', 1775 => '⚼', 1776 => '⚽', 1777 => '⚾', 1778 => '⚿', 1779 => '⛀', 1780 => '⛁', 1781 => '⛂', 1782 => '⛃', 1783 => '⛄', 1784 => '⛅', 1785 => '⛆', 1786 => '⛇', 1787 => '⛈', 1788 => '⛉', 1789 => '⛊', 1790 => '⛋', 1791 => '⛌', 1792 => '⛍', 1793 => '⛎', 1794 => '⛏', 1795 => '⛐', 1796 => '⛑', 1797 => '⛒', 1798 => '⛓', 1799 => '⛔', 1800 => '⛕', 1801 => '⛖', 1802 => '⛗', 1803 => '⛘', 1804 => '⛙', 1805 => '⛚', 1806 => '⛛', 1807 => '⛜', 1808 => '⛝', 1809 => '⛞', 1810 => '⛟', 1811 => '⛠', 1812 => '⛡', 1813 => '⛢', 1814 => '⛣', 1815 => '⛤', 1816 => '⛥', 1817 => '⛦', 1818 => '⛧', 1819 => '⛨', 1820 => '⛩', 1821 => '⛪', 1822 => '⛫', 1823 => '⛬', 1824 => '⛭', 1825 => '⛮', 1826 => '⛯', 1827 => '⛰', 1828 => '⛱', 1829 => '⛲', 1830 => '⛳', 1831 => '⛴', 1832 => '⛵', 1833 => '⛶', 1834 => '⛷', 1835 => '⛸', 1836 => '⛹', 1837 => '⛺', 1838 => '⛻', 1839 => '⛼', 1840 => '⛽', 1841 => '⛾', 1842 => '⛿', 1843 => '✁', 1844 => '✂', 1845 => '✃', 1846 => '✄', 1847 => '✅', 1848 => '✆', 1849 => '✇', 1850 => '✈', 1851 => '✉', 1852 => '✊', 1853 => '✋', 1854 => '✌', 1855 => '✍', 1856 => '✎', 1857 => '✏', 1858 => '✐', 1859 => '✑', 1860 => '✒', 1861 => '✓', 1862 => '✔', 1863 => '✕', 1864 => '✖', 1865 => '✗', 1866 => '✘', 1867 => '✙', 1868 => '✚', 1869 => '✛', 1870 => '✜', 1871 => '✝', 1872 => '✞', 1873 => '✟', 1874 => '✠', 1875 => '✡', 1876 => '✢', 1877 => '✣', 1878 => '✤', 1879 => '✥', 1880 => '✦', 1881 => '✧', 1882 => '✨', 1883 => '✩', 1884 => '✪', 1885 => '✫', 1886 => '✬', 1887 => '✭', 1888 => '✮', 1889 => '✯', 1890 => '✰', 1891 => '✱', 1892 => '✲', 1893 => '✳', 1894 => '✴', 1895 => '✵', 1896 => '✶', 1897 => '✷', 1898 => '✸', 1899 => '✹', 1900 => '✺', 1901 => '✻', 1902 => '✼', 1903 => '✽', 1904 => '✾', 1905 => '✿', 1906 => '❀', 1907 => '❁', 1908 => '❂', 1909 => '❃', 1910 => '❄', 1911 => '❅', 1912 => '❆', 1913 => '❇', 1914 => '❈', 1915 => '❉', 1916 => '❊', 1917 => '❋', 1918 => '❌', 1919 => '❍', 1920 => '❎', 1921 => '❏', 1922 => '❐', 1923 => '❑', 1924 => '❒', 1925 => '❓', 1926 => '❔', 1927 => '❕', 1928 => '❖', 1929 => '❗', 1930 => '❘', 1931 => '❙', 1932 => '❚', 1933 => '❛', 1934 => '❜', 1935 => '❝', 1936 => '❞', 1937 => '❟', 1938 => '❠', 1939 => '❡', 1940 => '❢', 1941 => '❣', 1942 => '❤', 1943 => '❥', 1944 => '❦', 1945 => '❧', 1946 => '❨', 1947 => '❩', 1948 => '❪', 1949 => '❫', 1950 => '❬', 1951 => '❭', 1952 => '❮', 1953 => '❯', 1954 => '❰', 1955 => '❱', 1956 => '❲', 1957 => '❳', 1958 => '❴', 1959 => '❵', 1960 => '➔', 1961 => '➕', 1962 => '➖', 1963 => '➗', 1964 => '➘', 1965 => '➙', 1966 => '➚', 1967 => '➛', 1968 => '➜', 1969 => '➝', 1970 => '➞', 1971 => '➟', 1972 => '➠', 1973 => '➡', 1974 => '➢', 1975 => '➣', 1976 => '➤', 1977 => '➥', 1978 => '➦', 1979 => '➧', 1980 => '➨', 1981 => '➩', 1982 => '➪', 1983 => '➫', 1984 => '➬', 1985 => '➭', 1986 => '➮', 1987 => '➯', 1988 => '➰', 1989 => '➱', 1990 => '➲', 1991 => '➳', 1992 => '➴', 1993 => '➵', 1994 => '➶', 1995 => '➷', 1996 => '➸', 1997 => '➹', 1998 => '➺', 1999 => '➻', 2000 => '➼', 2001 => '➽', 2002 => '➾', 2003 => '➿', 2004 => '⟀', 2005 => '⟁', 2006 => '⟂', 2007 => '⟃', 2008 => '⟄', 2009 => '⟅', 2010 => '⟆', 2011 => '⟇', 2012 => '⟈', 2013 => '⟉', 2014 => '⟊', 2015 => '⟌', 2016 => '⟎', 2017 => '⟏', 2018 => '⟐', 2019 => '⟑', 2020 => '⟒', 2021 => '⟓', 2022 => '⟔', 2023 => '⟕', 2024 => '⟖', 2025 => '⟗', 2026 => '⟘', 2027 => '⟙', 2028 => '⟚', 2029 => '⟛', 2030 => '⟜', 2031 => '⟝', 2032 => '⟞', 2033 => '⟟', 2034 => '⟠', 2035 => '⟡', 2036 => '⟢', 2037 => '⟣', 2038 => '⟤', 2039 => '⟥', 2040 => '⟦', 2041 => '⟧', 2042 => '⟨', 2043 => '⟩', 2044 => '⟪', 2045 => '⟫', 2046 => '⟰', 2047 => '⟱', 2048 => '⟲', 2049 => '⟳', 2050 => '⟴', 2051 => '⟵', 2052 => '⟶', 2053 => '⟷', 2054 => '⟸', 2055 => '⟹', 2056 => '⟺', 2057 => '⟻', 2058 => '⟼', 2059 => '⟽', 2060 => '⟾', 2061 => '⟿', 2062 => '⤀', 2063 => '⤁', 2064 => '⤂', 2065 => '⤃', 2066 => '⤄', 2067 => '⤅', 2068 => '⤆', 2069 => '⤇', 2070 => '⤈', 2071 => '⤉', 2072 => '⤊', 2073 => '⤋', 2074 => '⤌', 2075 => '⤍', 2076 => '⤎', 2077 => '⤏', 2078 => '⤐', 2079 => '⤑', 2080 => '⤒', 2081 => '⤓', 2082 => '⤔', 2083 => '⤕', 2084 => '⤖', 2085 => '⤗', 2086 => '⤘', 2087 => '⤙', 2088 => '⤚', 2089 => '⤛', 2090 => '⤜', 2091 => '⤝', 2092 => '⤞', 2093 => '⤟', 2094 => '⤠', 2095 => '⤡', 2096 => '⤢', 2097 => '⤣', 2098 => '⤤', 2099 => '⤥', 2100 => '⤦', 2101 => '⤧', 2102 => '⤨', 2103 => '⤩', 2104 => '⤪', 2105 => '⤫', 2106 => '⤬', 2107 => '⤭', 2108 => '⤮', 2109 => '⤯', 2110 => '⤰', 2111 => '⤱', 2112 => '⤲', 2113 => '⤳', 2114 => '⤴', 2115 => '⤵', 2116 => '⤶', 2117 => '⤷', 2118 => '⤸', 2119 => '⤹', 2120 => '⤺', 2121 => '⤻', 2122 => '⤼', 2123 => '⤽', 2124 => '⤾', 2125 => '⤿', 2126 => '⥀', 2127 => '⥁', 2128 => '⥂', 2129 => '⥃', 2130 => '⥄', 2131 => '⥅', 2132 => '⥆', 2133 => '⥇', 2134 => '⥈', 2135 => '⥉', 2136 => '⥊', 2137 => '⥋', 2138 => '⥌', 2139 => '⥍', 2140 => '⥎', 2141 => '⥏', 2142 => '⥐', 2143 => '⥑', 2144 => '⥒', 2145 => '⥓', 2146 => '⥔', 2147 => '⥕', 2148 => '⥖', 2149 => '⥗', 2150 => '⥘', 2151 => '⥙', 2152 => '⥚', 2153 => '⥛', 2154 => '⥜', 2155 => '⥝', 2156 => '⥞', 2157 => '⥟', 2158 => '⥠', 2159 => '⥡', 2160 => '⥢', 2161 => '⥣', 2162 => '⥤', 2163 => '⥥', 2164 => '⥦', 2165 => '⥧', 2166 => '⥨', 2167 => '⥩', 2168 => '⥪', 2169 => '⥫', 2170 => '⥬', 2171 => '⥭', 2172 => '⥮', 2173 => '⥯', 2174 => '⥰', 2175 => '⥱', 2176 => '⥲', 2177 => '⥳', 2178 => '⥴', 2179 => '⥵', 2180 => '⥶', 2181 => '⥷', 2182 => '⥸', 2183 => '⥹', 2184 => '⥺', 2185 => '⥻', 2186 => '⥼', 2187 => '⥽', 2188 => '⥾', 2189 => '⥿', 2190 => '⦀', 2191 => '⦁', 2192 => '⦂', 2193 => '⦙', 2194 => '⦚', 2195 => '⦛', 2196 => '⦜', 2197 => '⦝', 2198 => '⦞', 2199 => '⦟', 2200 => '⦠', 2201 => '⦡', 2202 => '⦢', 2203 => '⦣', 2204 => '⦤', 2205 => '⦥', 2206 => '⦦', 2207 => '⦧', 2208 => '⦨', 2209 => '⦩', 2210 => '⦪', 2211 => '⦫', 2212 => '⦬', 2213 => '⦭', 2214 => '⦮', 2215 => '⦯', 2216 => '⦰', 2217 => '⦱', 2218 => '⦲', 2219 => '⦳', 2220 => '⦴', 2221 => '⦵', 2222 => '⦶', 2223 => '⦷', 2224 => '⦸', 2225 => '⦹', 2226 => '⦺', 2227 => '⦻', 2228 => '⦼', 2229 => '⦽', 2230 => '⦾', 2231 => '⦿', 2232 => '⧀', 2233 => '⧁', 2234 => '⧂', 2235 => '⧃', 2236 => '⧄', 2237 => '⧅', 2238 => '⧆', 2239 => '⧇', 2240 => '⧈', 2241 => '⧉', 2242 => '⧊', 2243 => '⧋', 2244 => '⧌', 2245 => '⧍', 2246 => '⧎', 2247 => '⧏', 2248 => '⧐', 2249 => '⧑', 2250 => '⧒', 2251 => '⧓', 2252 => '⧔', 2253 => '⧕', 2254 => '⧖', 2255 => '⧗', 2256 => '⧘', 2257 => '⧙', 2258 => '⧚', 2259 => '⧛', 2260 => '⧜', 2261 => '⧝', 2262 => '⧞', 2263 => '⧟', 2264 => '⧠', 2265 => '⧡', 2266 => '⧢', 2267 => '⧣', 2268 => '⧤', 2269 => '⧥', 2270 => '⧦', 2271 => '⧧', 2272 => '⧨', 2273 => '⧩', 2274 => '⧪', 2275 => '⧫', 2276 => '⧬', 2277 => '⧭', 2278 => '⧮', 2279 => '⧯', 2280 => '⧰', 2281 => '⧱', 2282 => '⧲', 2283 => '⧳', 2284 => '⧴', 2285 => '⧵', 2286 => '⧶', 2287 => '⧷', 2288 => '⧸', 2289 => '⧹', 2290 => '⧺', 2291 => '⧻', 2292 => '⧾', 2293 => '⧿', 2294 => '⨀', 2295 => '⨁', 2296 => '⨂', 2297 => '⨃', 2298 => '⨄', 2299 => '⨅', 2300 => '⨆', 2301 => '⨇', 2302 => '⨈', 2303 => '⨉', 2304 => '⨊', 2305 => '⨋', 2306 => '⨍', 2307 => '⨎', 2308 => '⨏', 2309 => '⨐', 2310 => '⨑', 2311 => '⨒', 2312 => '⨓', 2313 => '⨔', 2314 => '⨕', 2315 => '⨖', 2316 => '⨗', 2317 => '⨘', 2318 => '⨙', 2319 => '⨚', 2320 => '⨛', 2321 => '⨜', 2322 => '⨝', 2323 => '⨞', 2324 => '⨟', 2325 => '⨠', 2326 => '⨡', 2327 => '⨢', 2328 => '⨣', 2329 => '⨤', 2330 => '⨥', 2331 => '⨦', 2332 => '⨧', 2333 => '⨨', 2334 => '⨩', 2335 => '⨪', 2336 => '⨫', 2337 => '⨬', 2338 => '⨭', 2339 => '⨮', 2340 => '⨯', 2341 => '⨰', 2342 => '⨱', 2343 => '⨲', 2344 => '⨳', 2345 => '⨴', 2346 => '⨵', 2347 => '⨶', 2348 => '⨷', 2349 => '⨸', 2350 => '⨹', 2351 => '⨺', 2352 => '⨻', 2353 => '⨼', 2354 => '⨽', 2355 => '⨾', 2356 => '⨿', 2357 => '⩀', 2358 => '⩁', 2359 => '⩂', 2360 => '⩃', 2361 => '⩄', 2362 => '⩅', 2363 => '⩆', 2364 => '⩇', 2365 => '⩈', 2366 => '⩉', 2367 => '⩊', 2368 => '⩋', 2369 => '⩌', 2370 => '⩍', 2371 => '⩎', 2372 => '⩏', 2373 => '⩐', 2374 => '⩑', 2375 => '⩒', 2376 => '⩓', 2377 => '⩔', 2378 => '⩕', 2379 => '⩖', 2380 => '⩗', 2381 => '⩘', 2382 => '⩙', 2383 => '⩚', 2384 => '⩛', 2385 => '⩜', 2386 => '⩝', 2387 => '⩞', 2388 => '⩟', 2389 => '⩠', 2390 => '⩡', 2391 => '⩢', 2392 => '⩣', 2393 => '⩤', 2394 => '⩥', 2395 => '⩦', 2396 => '⩧', 2397 => '⩨', 2398 => '⩩', 2399 => '⩪', 2400 => '⩫', 2401 => '⩬', 2402 => '⩭', 2403 => '⩮', 2404 => '⩯', 2405 => '⩰', 2406 => '⩱', 2407 => '⩲', 2408 => '⩳', 2409 => '⩷', 2410 => '⩸', 2411 => '⩹', 2412 => '⩺', 2413 => '⩻', 2414 => '⩼', 2415 => '⩽', 2416 => '⩾', 2417 => '⩿', 2418 => '⪀', 2419 => '⪁', 2420 => '⪂', 2421 => '⪃', 2422 => '⪄', 2423 => '⪅', 2424 => '⪆', 2425 => '⪇', 2426 => '⪈', 2427 => '⪉', 2428 => '⪊', 2429 => '⪋', 2430 => '⪌', 2431 => '⪍', 2432 => '⪎', 2433 => '⪏', 2434 => '⪐', 2435 => '⪑', 2436 => '⪒', 2437 => '⪓', 2438 => '⪔', 2439 => '⪕', 2440 => '⪖', 2441 => '⪗', 2442 => '⪘', 2443 => '⪙', 2444 => '⪚', 2445 => '⪛', 2446 => '⪜', 2447 => '⪝', 2448 => '⪞', 2449 => '⪟', 2450 => '⪠', 2451 => '⪡', 2452 => '⪢', 2453 => '⪣', 2454 => '⪤', 2455 => '⪥', 2456 => '⪦', 2457 => '⪧', 2458 => '⪨', 2459 => '⪩', 2460 => '⪪', 2461 => '⪫', 2462 => '⪬', 2463 => '⪭', 2464 => '⪮', 2465 => '⪯', 2466 => '⪰', 2467 => '⪱', 2468 => '⪲', 2469 => '⪳', 2470 => '⪴', 2471 => '⪵', 2472 => '⪶', 2473 => '⪷', 2474 => '⪸', 2475 => '⪹', 2476 => '⪺', 2477 => '⪻', 2478 => '⪼', 2479 => '⪽', 2480 => '⪾', 2481 => '⪿', 2482 => '⫀', 2483 => '⫁', 2484 => '⫂', 2485 => '⫃', 2486 => '⫄', 2487 => '⫅', 2488 => '⫆', 2489 => '⫇', 2490 => '⫈', 2491 => '⫉', 2492 => '⫊', 2493 => '⫋', 2494 => '⫌', 2495 => '⫍', 2496 => '⫎', 2497 => '⫏', 2498 => '⫐', 2499 => '⫑', 2500 => '⫒', 2501 => '⫓', 2502 => '⫔', 2503 => '⫕', 2504 => '⫖', 2505 => '⫗', 2506 => '⫘', 2507 => '⫙', 2508 => '⫚', 2509 => '⫛', 2510 => '⫝', 2511 => '⫞', 2512 => '⫟', 2513 => '⫠', 2514 => '⫡', 2515 => '⫢', 2516 => '⫣', 2517 => '⫤', 2518 => '⫥', 2519 => '⫦', 2520 => '⫧', 2521 => '⫨', 2522 => '⫩', 2523 => '⫪', 2524 => '⫫', 2525 => '⫬', 2526 => '⫭', 2527 => '⫮', 2528 => '⫯', 2529 => '⫰', 2530 => '⫱', 2531 => '⫲', 2532 => '⫳', 2533 => '⫴', 2534 => '⫵', 2535 => '⫶', 2536 => '⫷', 2537 => '⫸', 2538 => '⫹', 2539 => '⫺', 2540 => '⫻', 2541 => '⫼', 2542 => '⫽', 2543 => '⫾', 2544 => '⫿', 2545 => '⬀', 2546 => '⬁', 2547 => '⬂', 2548 => '⬃', 2549 => '⬄', 2550 => '⬅', 2551 => '⬆', 2552 => '⬇', 2553 => '⬈', 2554 => '⬉', 2555 => '⬊', 2556 => '⬋', 2557 => '⬌', 2558 => '⬍', 2559 => '⬎', 2560 => '⬏', 2561 => '⬐', 2562 => '⬑', 2563 => '⬒', 2564 => '⬓', 2565 => '⬔', 2566 => '⬕', 2567 => '⬖', 2568 => '⬗', 2569 => '⬘', 2570 => '⬙', 2571 => '⬚', 2572 => '⬛', 2573 => '⬜', 2574 => '⬝', 2575 => '⬞', 2576 => '⬟', 2577 => '⬠', 2578 => '⬡', 2579 => '⬢', 2580 => '⬣', 2581 => '⬤', 2582 => '⬥', 2583 => '⬦', 2584 => '⬧', 2585 => '⬨', 2586 => '⬩', 2587 => '⬪', 2588 => '⬫', 2589 => '⬬', 2590 => '⬭', 2591 => '⬮', 2592 => '⬯', 2593 => '⬰', 2594 => '⬱', 2595 => '⬲', 2596 => '⬳', 2597 => '⬴', 2598 => '⬵', 2599 => '⬶', 2600 => '⬷', 2601 => '⬸', 2602 => '⬹', 2603 => '⬺', 2604 => '⬻', 2605 => '⬼', 2606 => '⬽', 2607 => '⬾', 2608 => '⬿', 2609 => '⭀', 2610 => '⭁', 2611 => '⭂', 2612 => '⭃', 2613 => '⭄', 2614 => '⭅', 2615 => '⭆', 2616 => '⭇', 2617 => '⭈', 2618 => '⭉', 2619 => '⭊', 2620 => '⭋', 2621 => '⭌', 2622 => '⭐', 2623 => '⭑', 2624 => '⭒', 2625 => '⭓', 2626 => '⭔', 2627 => '⭕', 2628 => '⭖', 2629 => '⭗', 2630 => '⭘', 2631 => '⭙', 2632 => '⳥', 2633 => '⳦', 2634 => '⳧', 2635 => '⳨', 2636 => '⳩', 2637 => '⳪', 2638 => '⠀', 2639 => '⠁', 2640 => '⠂', 2641 => '⠃', 2642 => '⠄', 2643 => '⠅', 2644 => '⠆', 2645 => '⠇', 2646 => '⠈', 2647 => '⠉', 2648 => '⠊', 2649 => '⠋', 2650 => '⠌', 2651 => '⠍', 2652 => '⠎', 2653 => '⠏', 2654 => '⠐', 2655 => '⠑', 2656 => '⠒', 2657 => '⠓', 2658 => '⠔', 2659 => '⠕', 2660 => '⠖', 2661 => '⠗', 2662 => '⠘', 2663 => '⠙', 2664 => '⠚', 2665 => '⠛', 2666 => '⠜', 2667 => '⠝', 2668 => '⠞', 2669 => '⠟', 2670 => '⠠', 2671 => '⠡', 2672 => '⠢', 2673 => '⠣', 2674 => '⠤', 2675 => '⠥', 2676 => '⠦', 2677 => '⠧', 2678 => '⠨', 2679 => '⠩', 2680 => '⠪', 2681 => '⠫', 2682 => '⠬', 2683 => '⠭', 2684 => '⠮', 2685 => '⠯', 2686 => '⠰', 2687 => '⠱', 2688 => '⠲', 2689 => '⠳', 2690 => '⠴', 2691 => '⠵', 2692 => '⠶', 2693 => '⠷', 2694 => '⠸', 2695 => '⠹', 2696 => '⠺', 2697 => '⠻', 2698 => '⠼', 2699 => '⠽', 2700 => '⠾', 2701 => '⠿', 2702 => '⡀', 2703 => '⡁', 2704 => '⡂', 2705 => '⡃', 2706 => '⡄', 2707 => '⡅', 2708 => '⡆', 2709 => '⡇', 2710 => '⡈', 2711 => '⡉', 2712 => '⡊', 2713 => '⡋', 2714 => '⡌', 2715 => '⡍', 2716 => '⡎', 2717 => '⡏', 2718 => '⡐', 2719 => '⡑', 2720 => '⡒', 2721 => '⡓', 2722 => '⡔', 2723 => '⡕', 2724 => '⡖', 2725 => '⡗', 2726 => '⡘', 2727 => '⡙', 2728 => '⡚', 2729 => '⡛', 2730 => '⡜', 2731 => '⡝', 2732 => '⡞', 2733 => '⡟', 2734 => '⡠', 2735 => '⡡', 2736 => '⡢', 2737 => '⡣', 2738 => '⡤', 2739 => '⡥', 2740 => '⡦', 2741 => '⡧', 2742 => '⡨', 2743 => '⡩', 2744 => '⡪', 2745 => '⡫', 2746 => '⡬', 2747 => '⡭', 2748 => '⡮', 2749 => '⡯', 2750 => '⡰', 2751 => '⡱', 2752 => '⡲', 2753 => '⡳', 2754 => '⡴', 2755 => '⡵', 2756 => '⡶', 2757 => '⡷', 2758 => '⡸', 2759 => '⡹', 2760 => '⡺', 2761 => '⡻', 2762 => '⡼', 2763 => '⡽', 2764 => '⡾', 2765 => '⡿', 2766 => '⢀', 2767 => '⢁', 2768 => '⢂', 2769 => '⢃', 2770 => '⢄', 2771 => '⢅', 2772 => '⢆', 2773 => '⢇', 2774 => '⢈', 2775 => '⢉', 2776 => '⢊', 2777 => '⢋', 2778 => '⢌', 2779 => '⢍', 2780 => '⢎', 2781 => '⢏', 2782 => '⢐', 2783 => '⢑', 2784 => '⢒', 2785 => '⢓', 2786 => '⢔', 2787 => '⢕', 2788 => '⢖', 2789 => '⢗', 2790 => '⢘', 2791 => '⢙', 2792 => '⢚', 2793 => '⢛', 2794 => '⢜', 2795 => '⢝', 2796 => '⢞', 2797 => '⢟', 2798 => '⢠', 2799 => '⢡', 2800 => '⢢', 2801 => '⢣', 2802 => '⢤', 2803 => '⢥', 2804 => '⢦', 2805 => '⢧', 2806 => '⢨', 2807 => '⢩', 2808 => '⢪', 2809 => '⢫', 2810 => '⢬', 2811 => '⢭', 2812 => '⢮', 2813 => '⢯', 2814 => '⢰', 2815 => '⢱', 2816 => '⢲', 2817 => '⢳', 2818 => '⢴', 2819 => '⢵', 2820 => '⢶', 2821 => '⢷', 2822 => '⢸', 2823 => '⢹', 2824 => '⢺', 2825 => '⢻', 2826 => '⢼', 2827 => '⢽', 2828 => '⢾', 2829 => '⢿', 2830 => '⣀', 2831 => '⣁', 2832 => '⣂', 2833 => '⣃', 2834 => '⣄', 2835 => '⣅', 2836 => '⣆', 2837 => '⣇', 2838 => '⣈', 2839 => '⣉', 2840 => '⣊', 2841 => '⣋', 2842 => '⣌', 2843 => '⣍', 2844 => '⣎', 2845 => '⣏', 2846 => '⣐', 2847 => '⣑', 2848 => '⣒', 2849 => '⣓', 2850 => '⣔', 2851 => '⣕', 2852 => '⣖', 2853 => '⣗', 2854 => '⣘', 2855 => '⣙', 2856 => '⣚', 2857 => '⣛', 2858 => '⣜', 2859 => '⣝', 2860 => '⣞', 2861 => '⣟', 2862 => '⣠', 2863 => '⣡', 2864 => '⣢', 2865 => '⣣', 2866 => '⣤', 2867 => '⣥', 2868 => '⣦', 2869 => '⣧', 2870 => '⣨', 2871 => '⣩', 2872 => '⣪', 2873 => '⣫', 2874 => '⣬', 2875 => '⣭', 2876 => '⣮', 2877 => '⣯', 2878 => '⣰', 2879 => '⣱', 2880 => '⣲', 2881 => '⣳', 2882 => '⣴', 2883 => '⣵', 2884 => '⣶', 2885 => '⣷', 2886 => '⣸', 2887 => '⣹', 2888 => '⣺', 2889 => '⣻', 2890 => '⣼', 2891 => '⣽', 2892 => '⣾', 2893 => '⣿', 2894 => '⚊', 2895 => '⚋', 2896 => '⚌', 2897 => '⚍', 2898 => '⚎', 2899 => '⚏', 2900 => '☰', 2901 => '☱', 2902 => '☲', 2903 => '☳', 2904 => '☴', 2905 => '☵', 2906 => '☶', 2907 => '☷', 2908 => '䷀', 2909 => '䷁', 2910 => '䷂', 2911 => '䷃', 2912 => '䷄', 2913 => '䷅', 2914 => '䷆', 2915 => '䷇', 2916 => '䷈', 2917 => '䷉', 2918 => '䷊', 2919 => '䷋', 2920 => '䷌', 2921 => '䷍', 2922 => '䷎', 2923 => '䷏', 2924 => '䷐', 2925 => '䷑', 2926 => '䷒', 2927 => '䷓', 2928 => '䷔', 2929 => '䷕', 2930 => '䷖', 2931 => '䷗', 2932 => '䷘', 2933 => '䷙', 2934 => '䷚', 2935 => '䷛', 2936 => '䷜', 2937 => '䷝', 2938 => '䷞', 2939 => '䷟', 2940 => '䷠', 2941 => '䷡', 2942 => '䷢', 2943 => '䷣', 2944 => '䷤', 2945 => '䷥', 2946 => '䷦', 2947 => '䷧', 2948 => '䷨', 2949 => '䷩', 2950 => '䷪', 2951 => '䷫', 2952 => '䷬', 2953 => '䷭', 2954 => '䷮', 2955 => '䷯', 2956 => '䷰', 2957 => '䷱', 2958 => '䷲', 2959 => '䷳', 2960 => '䷴', 2961 => '䷵', 2962 => '䷶', 2963 => '䷷', 2964 => '䷸', 2965 => '䷹', 2966 => '䷺', 2967 => '䷻', 2968 => '䷼', 2969 => '䷽', 2970 => '䷾', 2971 => '䷿', 2972 => '𝌀', 2973 => '𝌁', 2974 => '𝌂', 2975 => '𝌃', 2976 => '𝌄', 2977 => '𝌅', 2978 => '𝌆', 2979 => '𝌇', 2980 => '𝌈', 2981 => '𝌉', 2982 => '𝌊', 2983 => '𝌋', 2984 => '𝌌', 2985 => '𝌍', 2986 => '𝌎', 2987 => '𝌏', 2988 => '𝌐', 2989 => '𝌑', 2990 => '𝌒', 2991 => '𝌓', 2992 => '𝌔', 2993 => '𝌕', 2994 => '𝌖', 2995 => '𝌗', 2996 => '𝌘', 2997 => '𝌙', 2998 => '𝌚', 2999 => '𝌛', 3000 => '𝌜', 3001 => '𝌝', 3002 => '𝌞', 3003 => '𝌟', 3004 => '𝌠', 3005 => '𝌡', 3006 => '𝌢', 3007 => '𝌣', 3008 => '𝌤', 3009 => '𝌥', 3010 => '𝌦', 3011 => '𝌧', 3012 => '𝌨', 3013 => '𝌩', 3014 => '𝌪', 3015 => '𝌫', 3016 => '𝌬', 3017 => '𝌭', 3018 => '𝌮', 3019 => '𝌯', 3020 => '𝌰', 3021 => '𝌱', 3022 => '𝌲', 3023 => '𝌳', 3024 => '𝌴', 3025 => '𝌵', 3026 => '𝌶', 3027 => '𝌷', 3028 => '𝌸', 3029 => '𝌹', 3030 => '𝌺', 3031 => '𝌻', 3032 => '𝌼', 3033 => '𝌽', 3034 => '𝌾', 3035 => '𝌿', 3036 => '𝍀', 3037 => '𝍁', 3038 => '𝍂', 3039 => '𝍃', 3040 => '𝍄', 3041 => '𝍅', 3042 => '𝍆', 3043 => '𝍇', 3044 => '𝍈', 3045 => '𝍉', 3046 => '𝍊', 3047 => '𝍋', 3048 => '𝍌', 3049 => '𝍍', 305