MediaWiki master
Parser.php
Go to the documentation of this file.
1<?php
24namespace MediaWiki\Parser;
25
26use BadMethodCallException;
30use CoreTagHooks;
31use Exception;
32use File;
33use HtmlArmor;
36use InvalidArgumentException;
38use LogicException;
39use MapCacheLRU;
40use MediaHandler;
46use MediaWiki\Debug\DeprecationHelper;
91use PPFrame;
92use PPNode;
93use Preprocessor;
95use Psr\Log\LoggerInterface;
96use RuntimeException;
98use StringUtils;
99use StripState;
100use UnexpectedValueException;
101use Wikimedia\Bcp47Code\Bcp47CodeValue;
102use Wikimedia\IPUtils;
104use Wikimedia\Parsoid\Core\SectionMetadata;
105use Wikimedia\Parsoid\Core\TOCData;
106use Wikimedia\Parsoid\DOM\Comment;
107use Wikimedia\Parsoid\DOM\DocumentFragment;
108use Wikimedia\Parsoid\DOM\Element;
109use Wikimedia\Parsoid\DOM\Node;
110use Wikimedia\Parsoid\Utils\DOMCompat;
111use Wikimedia\Parsoid\Utils\DOMUtils;
112use Wikimedia\ScopedCallback;
113
154#[\AllowDynamicProperties]
155class Parser {
156 use DeprecationHelper;
157
158 # Flags for Parser::setFunctionHook
159 public const SFH_NO_HASH = 1;
160 public const SFH_OBJECT_ARGS = 2;
161
162 # Constants needed for external link processing
170 public const EXT_LINK_URL_CLASS = '[^][<>"\\x00-\\x20\\x7F\p{Zs}\x{FFFD}]';
175 // phpcs:ignore Generic.Files.LineLength
176 private const EXT_LINK_ADDR = '(?:[0-9.]+|\\[(?i:[0-9a-f:.]+)\\]|[^][<>"\\x00-\\x20\\x7F\p{Zs}\x{FFFD}])';
178 // phpcs:ignore Generic.Files.LineLength
179 private const EXT_IMAGE_REGEX = '/^(http:\/\/|https:\/\/)((?:\\[(?i:[0-9a-f:.]+)\\])?[^][<>"\\x00-\\x20\\x7F\p{Zs}\x{FFFD}]+)
180 \\/([A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF]+)\\.((?i)avif|gif|jpg|jpeg|png|svg|webp)$/Sxu';
181
183 private const SPACE_NOT_NL = '(?:\t|&nbsp;|&\#0*160;|&\#[Xx]0*[Aa]0;|\p{Zs})';
184
189 public const PTD_FOR_INCLUSION = Preprocessor::DOM_FOR_INCLUSION;
190
191 # Allowed values for $this->mOutputType
193 public const OT_HTML = 1;
195 public const OT_WIKI = 2;
197 public const OT_PREPROCESS = 3;
202 public const OT_PLAIN = 4;
203
221 public const MARKER_SUFFIX = "-QINU`\"'\x7f";
222 public const MARKER_PREFIX = "\x7f'\"`UNIQ-";
223
238 public const TOC_PLACEHOLDER = '<meta property="mw:PageProp/toc" />';
239
247 private const TOC_PLACEHOLDER_REGEX = '/<meta\\b[^>]*\\bproperty\\s*=\\s*"mw:PageProp\\/toc"[^>]*>/';
248
249 # Persistent:
251 private array $mTagHooks = [];
253 private array $mFunctionHooks = [];
255 private array $mFunctionSynonyms = [ 0 => [], 1 => [] ];
257 private array $mStripList = [];
259 private array $mVarCache = [];
261 private array $mImageParams = [];
263 private array $mImageParamsMagicArray = [];
265 public $mMarkerIndex = 0;
266
267 // Initialised by initializeVariables()
269 private MagicWordArray $mVariables;
270 private MagicWordArray $mSubstWords;
271
272 // Initialised in constructor
274 private string $mExtLinkBracketedRegex;
275 private UrlUtils $urlUtils;
276 private Preprocessor $mPreprocessor;
277
278 // Cleared with clearState():
280 private ParserOutput $mOutput;
281 private int $mAutonumber = 0;
282 private StripState $mStripState;
283 private LinkHolderArray $mLinkHolders;
284 private int $mLinkID = 0;
285 private array $mIncludeSizes;
296 private array $mTplRedirCache;
298 public array $mHeadings;
300 private array $mDoubleUnderscores;
306 private bool $mShowToc;
307 private bool $mForceTocPosition;
308 private array $mTplDomCache;
309 private ?UserIdentity $mUser;
310
311 # Temporary
312 # These are variables reset at least once per parse regardless of $clearState
313
318 private $mOptions;
319
320 # Deprecated "dynamic" properties
321 # These used to be dynamic properties added to the parser, but these
322 # have been deprecated since 1.42.
326 public $extCite;
343
349 private Title $mTitle;
351 private int $mOutputType;
356 private array $ot;
358 private ?int $mRevisionId = null;
360 private ?string $mRevisionTimestamp = null;
362 private ?string $mRevisionUser = null;
364 private ?int $mRevisionSize = null;
366 private $mInputSize = false;
367
368 private ?RevisionRecord $mRevisionRecordObject = null;
369
375 private ?MapCacheLRU $currentRevisionCache = null;
376
381 private $mInParse = false;
382
383 private SectionProfiler $mProfiler;
384 private ?LinkRenderer $mLinkRenderer = null;
385
386 private MagicWordFactory $magicWordFactory;
387 private Language $contLang;
388 private LanguageConverterFactory $languageConverterFactory;
389 private LanguageNameUtils $languageNameUtils;
390 private ParserFactory $factory;
391 private SpecialPageFactory $specialPageFactory;
392 private TitleFormatter $titleFormatter;
398 private ServiceOptions $svcOptions;
399 private LinkRendererFactory $linkRendererFactory;
400 private NamespaceInfo $nsInfo;
401 private LoggerInterface $logger;
402 private BadFileLookup $badFileLookup;
403 private HookContainer $hookContainer;
404 private HookRunner $hookRunner;
405 private TidyDriverBase $tidy;
406 private WANObjectCache $wanCache;
407 private UserOptionsLookup $userOptionsLookup;
408 private UserFactory $userFactory;
409 private HttpRequestFactory $httpRequestFactory;
410 private TrackingCategories $trackingCategories;
411 private SignatureValidatorFactory $signatureValidatorFactory;
412 private UserNameUtils $userNameUtils;
413
417 public const CONSTRUCTOR_OPTIONS = [
418 // See documentation for the corresponding config options
419 // Many of these are only used in (eg) CoreMagicVariables
443 ];
444
472 public function __construct(
473 ServiceOptions $svcOptions,
474 MagicWordFactory $magicWordFactory,
475 Language $contLang,
476 ParserFactory $factory,
477 UrlUtils $urlUtils,
478 SpecialPageFactory $spFactory,
479 LinkRendererFactory $linkRendererFactory,
480 NamespaceInfo $nsInfo,
481 LoggerInterface $logger,
482 BadFileLookup $badFileLookup,
483 LanguageConverterFactory $languageConverterFactory,
484 LanguageNameUtils $languageNameUtils,
485 HookContainer $hookContainer,
486 TidyDriverBase $tidy,
487 WANObjectCache $wanCache,
488 UserOptionsLookup $userOptionsLookup,
489 UserFactory $userFactory,
490 TitleFormatter $titleFormatter,
491 HttpRequestFactory $httpRequestFactory,
492 TrackingCategories $trackingCategories,
493 SignatureValidatorFactory $signatureValidatorFactory,
494 UserNameUtils $userNameUtils
495 ) {
496 $this->deprecateDynamicPropertiesAccess( '1.42', __CLASS__ );
497 $this->deprecatePublicProperty( 'ot', '1.35', __CLASS__ );
498 $this->deprecatePublicProperty( 'mTitle', '1.35', __CLASS__ );
499 $this->deprecatePublicProperty( 'mOptions', '1.35', __CLASS__ );
500
501 if ( ParserFactory::$inParserFactory === 0 ) {
502 // Direct construction of Parser was deprecated in 1.34 and
503 // removed in 1.36; use a ParserFactory instead.
504 throw new BadMethodCallException( 'Direct construction of Parser not allowed' );
505 }
506 $svcOptions->assertRequiredOptions( self::CONSTRUCTOR_OPTIONS );
507 $this->svcOptions = $svcOptions;
508
509 $this->urlUtils = $urlUtils;
510 $this->mExtLinkBracketedRegex = '/\[(((?i)' . $this->urlUtils->validProtocols() . ')' .
511 self::EXT_LINK_ADDR .
512 self::EXT_LINK_URL_CLASS . '*)\p{Zs}*([^\]\\x00-\\x08\\x0a-\\x1F\\x{FFFD}]*)\]/Su';
513
514 $this->magicWordFactory = $magicWordFactory;
515
516 $this->contLang = $contLang;
517
518 $this->factory = $factory;
519 $this->specialPageFactory = $spFactory;
520 $this->linkRendererFactory = $linkRendererFactory;
521 $this->nsInfo = $nsInfo;
522 $this->logger = $logger;
523 $this->badFileLookup = $badFileLookup;
524
525 $this->languageConverterFactory = $languageConverterFactory;
526 $this->languageNameUtils = $languageNameUtils;
527
528 $this->hookContainer = $hookContainer;
529 $this->hookRunner = new HookRunner( $hookContainer );
530
531 $this->tidy = $tidy;
532
533 $this->wanCache = $wanCache;
534 $this->mPreprocessor = new Preprocessor_Hash(
535 $this,
536 $this->wanCache,
537 [
538 'cacheThreshold' => $svcOptions->get( MainConfigNames::PreprocessorCacheThreshold ),
539 'disableLangConversion' => $languageConverterFactory->isConversionDisabled(),
540 ]
541 );
542
543 $this->userOptionsLookup = $userOptionsLookup;
544 $this->userFactory = $userFactory;
545 $this->titleFormatter = $titleFormatter;
546 $this->httpRequestFactory = $httpRequestFactory;
547 $this->trackingCategories = $trackingCategories;
548 $this->signatureValidatorFactory = $signatureValidatorFactory;
549 $this->userNameUtils = $userNameUtils;
550
551 // These steps used to be done in "::firstCallInit()"
552 // (if you're chasing a reference from some old code)
553 CoreParserFunctions::register(
554 $this,
555 new ServiceOptions( CoreParserFunctions::REGISTER_OPTIONS, $svcOptions )
556 );
558 $this,
560 );
561 $this->initializeVariables();
562
563 $this->hookRunner->onParserFirstCallInit( $this );
564 $this->mTitle = Title::makeTitle( NS_SPECIAL, 'Badtitle/Missing' );
565 }
566
570 public function __destruct() {
571 // @phan-suppress-next-line PhanRedundantCondition Typed property not set in constructor, may be uninitialized
572 if ( isset( $this->mLinkHolders ) ) {
573 // @phan-suppress-next-line PhanTypeObjectUnsetDeclaredProperty
574 unset( $this->mLinkHolders );
575 }
576 // @phan-suppress-next-line PhanTypeSuspiciousNonTraversableForeach
577 foreach ( $this as $name => $value ) {
578 unset( $this->$name );
579 }
580 }
581
585 public function __clone() {
586 $this->mInParse = false;
587
588 $this->mPreprocessor = clone $this->mPreprocessor;
589 $this->mPreprocessor->resetParser( $this );
590
591 $this->hookRunner->onParserCloned( $this );
592 }
593
601 public function firstCallInit() {
602 /*
603 * This method should be hard-deprecated once remaining calls are
604 * removed; it no longer does anything.
605 */
606 }
607
613 public function clearState() {
614 $this->resetOutput();
615 $this->mAutonumber = 0;
616 $this->mLinkHolders = new LinkHolderArray(
617 $this,
618 $this->getContentLanguageConverter(),
619 $this->getHookContainer()
620 );
621 $this->mLinkID = 0;
622 $this->mRevisionTimestamp = null;
623 $this->mRevisionId = null;
624 $this->mRevisionUser = null;
625 $this->mRevisionSize = null;
626 $this->mRevisionRecordObject = null;
627 $this->mVarCache = [];
628 $this->mUser = null;
629 $this->currentRevisionCache = null;
630
631 $this->mStripState = new StripState( $this );
632
633 # Clear these on every parse, T6549
634 $this->mTplRedirCache = [];
635 $this->mTplDomCache = [];
636
637 $this->mShowToc = true;
638 $this->mForceTocPosition = false;
639 $this->mIncludeSizes = [
640 'post-expand' => 0,
641 'arg' => 0,
642 ];
643 $this->mPPNodeCount = 0;
644 $this->mHighestExpansionDepth = 0;
645 $this->mHeadings = [];
646 $this->mDoubleUnderscores = [];
647 $this->mExpensiveFunctionCount = 0;
648
649 $this->mProfiler = new SectionProfiler();
650
651 $this->hookRunner->onParserClearState( $this );
652 }
653
658 public function resetOutput() {
659 $this->mOutput = new ParserOutput;
660 $this->mOptions->registerWatcher( [ $this->mOutput, 'recordOption' ] );
661 }
662
681 public function parse(
682 $text, PageReference $page, ParserOptions $options,
683 $linestart = true, $clearState = true, $revid = null
684 ) {
685 if ( $clearState ) {
686 // We use U+007F DELETE to construct strip markers, so we have to make
687 // sure that this character does not occur in the input text.
688 $text = strtr( $text, "\x7f", "?" );
689 $magicScopeVariable = $this->lock();
690 }
691 // Strip U+0000 NULL (T159174)
692 $text = str_replace( "\000", '', $text );
693
694 $this->startParse( $page, $options, self::OT_HTML, $clearState );
695
696 $this->currentRevisionCache = null;
697 $this->mInputSize = strlen( $text );
698 $this->mOutput->resetParseStartTime();
699
700 $oldRevisionId = $this->mRevisionId;
701 $oldRevisionRecordObject = $this->mRevisionRecordObject;
702 $oldRevisionTimestamp = $this->mRevisionTimestamp;
703 $oldRevisionUser = $this->mRevisionUser;
704 $oldRevisionSize = $this->mRevisionSize;
705 if ( $revid !== null ) {
706 $this->mRevisionId = $revid;
707 $this->mRevisionRecordObject = null;
708 $this->mRevisionTimestamp = null;
709 $this->mRevisionUser = null;
710 $this->mRevisionSize = null;
711 }
712
713 $text = $this->internalParse( $text );
714 $this->hookRunner->onParserAfterParse( $this, $text, $this->mStripState );
715
716 $text = $this->internalParseHalfParsed( $text, true, $linestart );
717
725 if ( !$options->getDisableTitleConversion()
726 && !isset( $this->mDoubleUnderscores['nocontentconvert'] )
727 && !isset( $this->mDoubleUnderscores['notitleconvert'] )
728 && $this->mOutput->getDisplayTitle() === false
729 ) {
730 $titleText = $this->getTargetLanguageConverter()->getConvRuleTitle();
731 if ( $titleText !== false ) {
732 $titleText = Sanitizer::removeSomeTags( $titleText );
733 } else {
734 [ $nsText, $nsSeparator, $mainText ] = $this->getTargetLanguageConverter()->convertSplitTitle( $page );
735 // In the future, those three pieces could be stored separately rather than joined into $titleText,
736 // and OutputPage would format them and join them together, to resolve T314399.
737 $titleText = self::formatPageTitle( $nsText, $nsSeparator, $mainText );
738 }
739 $this->mOutput->setTitleText( $titleText );
740 }
741
742 # Recording timing info. Must be called before finalizeAdaptiveCacheExpiry() and
743 # makeLimitReport(), which make use of the timing info.
744 $this->mOutput->recordTimeProfile();
745
746 # Compute runtime adaptive expiry if set
747 $this->mOutput->finalizeAdaptiveCacheExpiry();
748
749 # Warn if too many heavyweight parser functions were used
750 if ( $this->mExpensiveFunctionCount > $options->getExpensiveParserFunctionLimit() ) {
751 $this->limitationWarn( 'expensive-parserfunction',
752 $this->mExpensiveFunctionCount,
754 );
755 }
756
757 # Information on limits, for the benefit of users who try to skirt them
758 if ( $this->svcOptions->get( MainConfigNames::EnableParserLimitReporting ) ) {
759 $this->makeLimitReport();
760 }
761
762 $this->mOutput->setFromParserOptions( $options );
763
764 $this->mOutput->setRawText( $text );
765
766 $this->mRevisionId = $oldRevisionId;
767 $this->mRevisionRecordObject = $oldRevisionRecordObject;
768 $this->mRevisionTimestamp = $oldRevisionTimestamp;
769 $this->mRevisionUser = $oldRevisionUser;
770 $this->mRevisionSize = $oldRevisionSize;
771 $this->mInputSize = false;
772 $this->currentRevisionCache = null;
773
774 return $this->mOutput;
775 }
776
780 protected function makeLimitReport() {
781 $maxIncludeSize = $this->mOptions->getMaxIncludeSize();
782
783 $cpuTime = $this->mOutput->getTimeProfile( 'cpu' );
784 if ( $cpuTime !== null ) {
785 $this->mOutput->setLimitReportData( 'limitreport-cputime',
786 sprintf( "%.3f", $cpuTime )
787 );
788 }
789
790 $wallTime = $this->mOutput->getTimeProfile( 'wall' );
791 $this->mOutput->setLimitReportData( 'limitreport-walltime',
792 sprintf( "%.3f", $wallTime )
793 );
794
795 $this->mOutput->setLimitReportData( 'limitreport-ppvisitednodes',
796 [ $this->mPPNodeCount, $this->mOptions->getMaxPPNodeCount() ]
797 );
798 $this->mOutput->setLimitReportData( 'limitreport-postexpandincludesize',
799 [ $this->mIncludeSizes['post-expand'], $maxIncludeSize ]
800 );
801 $this->mOutput->setLimitReportData( 'limitreport-templateargumentsize',
802 [ $this->mIncludeSizes['arg'], $maxIncludeSize ]
803 );
804 $this->mOutput->setLimitReportData( 'limitreport-expansiondepth',
805 [ $this->mHighestExpansionDepth, $this->mOptions->getMaxPPExpandDepth() ]
806 );
807 $this->mOutput->setLimitReportData( 'limitreport-expensivefunctioncount',
808 [ $this->mExpensiveFunctionCount, $this->mOptions->getExpensiveParserFunctionLimit() ]
809 );
810
811 foreach ( $this->mStripState->getLimitReport() as [ $key, $value ] ) {
812 $this->mOutput->setLimitReportData( $key, $value );
813 }
814
815 $this->hookRunner->onParserLimitReportPrepare( $this, $this->mOutput );
816
817 // Add on template profiling data in human/machine readable way
818 $dataByFunc = $this->mProfiler->getFunctionStats();
819 uasort( $dataByFunc, static function ( $a, $b ) {
820 return $b['real'] <=> $a['real']; // descending order
821 } );
822 $profileReport = [];
823 foreach ( array_slice( $dataByFunc, 0, 10 ) as $item ) {
824 $profileReport[] = sprintf( "%6.2f%% %8.3f %6d %s",
825 $item['%real'], $item['real'], $item['calls'],
826 htmlspecialchars( $item['name'] ) );
827 }
828
829 $this->mOutput->setLimitReportData( 'limitreport-timingprofile', $profileReport );
830
831 // Add other cache related metadata
832 if ( $this->svcOptions->get( MainConfigNames::ShowHostnames ) ) {
833 $this->mOutput->setLimitReportData( 'cachereport-origin', wfHostname() );
834 }
835 $this->mOutput->setLimitReportData( 'cachereport-timestamp',
836 $this->mOutput->getCacheTime() );
837 $this->mOutput->setLimitReportData( 'cachereport-ttl',
838 $this->mOutput->getCacheExpiry() );
839 $this->mOutput->setLimitReportData( 'cachereport-transientcontent',
840 $this->mOutput->hasReducedExpiry() );
841 }
842
868 public function recursiveTagParse( $text, $frame = false ) {
869 $text = $this->internalParse( $text, false, $frame );
870 return $text;
871 }
872
892 public function recursiveTagParseFully( $text, $frame = false ) {
893 $text = $this->recursiveTagParse( $text, $frame );
894 $text = $this->internalParseHalfParsed( $text, false );
895 return $text;
896 }
897
917 public function parseExtensionTagAsTopLevelDoc( $text ) {
918 $text = $this->recursiveTagParse( $text );
919 $this->hookRunner->onParserAfterParse( $this, $text, $this->mStripState );
920 $text = $this->internalParseHalfParsed( $text, true );
921 return $text;
922 }
923
936 public function preprocess(
937 $text,
938 ?PageReference $page,
939 ParserOptions $options,
940 $revid = null,
941 $frame = false
942 ) {
943 $magicScopeVariable = $this->lock();
944 $this->startParse( $page, $options, self::OT_PREPROCESS, true );
945 if ( $revid !== null ) {
946 $this->mRevisionId = $revid;
947 }
948 $this->hookRunner->onParserBeforePreprocess( $this, $text, $this->mStripState );
949 $text = $this->replaceVariables( $text, $frame );
950 $text = $this->mStripState->unstripBoth( $text );
951 return $text;
952 }
953
963 public function recursivePreprocess( $text, $frame = false ) {
964 $text = $this->replaceVariables( $text, $frame );
965 $text = $this->mStripState->unstripBoth( $text );
966 return $text;
967 }
968
983 public function getPreloadText( $text, PageReference $page, ParserOptions $options, $params = [] ) {
984 $msg = new RawMessage( $text );
985 $text = $msg->params( $params )->plain();
986
987 # Parser (re)initialisation
988 $magicScopeVariable = $this->lock();
989 $this->startParse( $page, $options, self::OT_PLAIN, true );
990
991 $flags = PPFrame::NO_ARGS | PPFrame::NO_TEMPLATES;
992 $dom = $this->preprocessToDom( $text, Preprocessor::DOM_FOR_INCLUSION );
993 $text = $this->getPreprocessor()->newFrame()->expand( $dom, $flags );
994 $text = $this->mStripState->unstripBoth( $text );
995 return $text;
996 }
997
1005 public function setUser( ?UserIdentity $user ) {
1006 $this->mUser = $user;
1007 }
1008
1016 public function setTitle( Title $t = null ) {
1017 $this->setPage( $t );
1018 }
1019
1025 public function getTitle(): Title {
1026 return $this->mTitle;
1027 }
1028
1035 public function setPage( ?PageReference $t = null ) {
1036 if ( !$t ) {
1037 $t = Title::makeTitle( NS_SPECIAL, 'Badtitle/Parser' );
1038 } else {
1039 // For now (early 1.37 alpha), always convert to Title, so we don't have to do it over
1040 // and over again in other methods. Eventually, we will no longer need to have a Title
1041 // instance internally.
1042 $t = Title::newFromPageReference( $t );
1043 }
1044
1045 if ( $t->hasFragment() ) {
1046 # Strip the fragment to avoid various odd effects
1047 $this->mTitle = $t->createFragmentTarget( '' );
1048 } else {
1049 $this->mTitle = $t;
1050 }
1051 }
1052
1058 public function getPage(): ?PageReference {
1059 if ( $this->mTitle->isSpecial( 'Badtitle' ) ) {
1060 [ , $subPage ] = $this->specialPageFactory->resolveAlias( $this->mTitle->getDBkey() );
1061
1062 if ( $subPage === 'Missing' ) {
1063 wfDeprecated( __METHOD__ . ' without a Title set', '1.34' );
1064 return null;
1065 }
1066 }
1067
1068 return $this->mTitle;
1069 }
1070
1076 public function getOutputType(): int {
1077 return $this->mOutputType;
1078 }
1079
1085 public function setOutputType( $ot ): void {
1086 $this->mOutputType = $ot;
1087 # Shortcut alias
1088 $this->ot = [
1089 'html' => $ot == self::OT_HTML,
1090 'wiki' => $ot == self::OT_WIKI,
1091 'pre' => $ot == self::OT_PREPROCESS,
1092 'plain' => $ot == self::OT_PLAIN,
1093 ];
1094 }
1095
1103 public function OutputType( $x = null ) {
1104 wfDeprecated( __METHOD__, '1.35' );
1105 return wfSetVar( $this->mOutputType, $x );
1106 }
1107
1112 public function getOutput() {
1113 // @phan-suppress-next-line PhanRedundantCondition False positive, see https://github.com/phan/phan/issues/4720
1114 if ( !isset( $this->mOutput ) ) {
1115 wfDeprecated( __METHOD__ . ' before initialization', '1.42' );
1116 // @phan-suppress-next-line PhanTypeMismatchReturnProbablyReal We don’t want to tell anyone we’re doing this
1117 return null;
1118 }
1119 return $this->mOutput;
1120 }
1121
1126 public function getOptions() {
1127 return $this->mOptions;
1128 }
1129
1135 public function setOptions( ParserOptions $options ): void {
1136 $this->mOptions = $options;
1137 }
1138
1146 public function Options( $x = null ) {
1147 wfDeprecated( __METHOD__, '1.35' );
1148 return wfSetVar( $this->mOptions, $x );
1149 }
1150
1155 public function nextLinkID() {
1156 return $this->mLinkID++;
1157 }
1158
1163 public function setLinkID( $id ) {
1164 $this->mLinkID = $id;
1165 }
1166
1173 public function getFunctionLang() {
1174 wfDeprecated( __METHOD__, '1.40' );
1175 return $this->getTargetLanguage();
1176 }
1177
1186 public function getTargetLanguage() {
1187 $target = $this->mOptions->getTargetLanguage();
1188
1189 if ( $target !== null ) {
1190 return $target;
1191 } elseif ( $this->mOptions->getInterfaceMessage() ) {
1192 return $this->mOptions->getUserLangObj();
1193 }
1194
1195 return $this->getTitle()->getPageLanguage();
1196 }
1197
1205 public function getUserIdentity(): UserIdentity {
1206 return $this->mUser ?? $this->getOptions()->getUserIdentity();
1207 }
1208
1215 public function getPreprocessor() {
1216 return $this->mPreprocessor;
1217 }
1218
1225 public function getLinkRenderer() {
1226 // XXX We make the LinkRenderer with current options and then cache it forever
1227 if ( !$this->mLinkRenderer ) {
1228 $this->mLinkRenderer = $this->linkRendererFactory->create();
1229 }
1230
1231 return $this->mLinkRenderer;
1232 }
1233
1240 public function getMagicWordFactory() {
1241 return $this->magicWordFactory;
1242 }
1243
1250 public function getContentLanguage() {
1251 return $this->contLang;
1252 }
1253
1260 public function getBadFileLookup() {
1261 return $this->badFileLookup;
1262 }
1263
1283 public static function extractTagsAndParams( array $elements, $text, &$matches ) {
1284 static $n = 1;
1285 $stripped = '';
1286 $matches = [];
1287
1288 $taglist = implode( '|', $elements );
1289 $start = "/<($taglist)(\\s+[^>]*?|\\s*?)(\/?>)|<(!--)/i";
1290
1291 while ( $text != '' ) {
1292 $p = preg_split( $start, $text, 2, PREG_SPLIT_DELIM_CAPTURE );
1293 $stripped .= $p[0];
1294 if ( count( $p ) < 5 ) {
1295 break;
1296 }
1297 if ( count( $p ) > 5 ) {
1298 # comment
1299 $element = $p[4];
1300 $attributes = '';
1301 $close = '';
1302 $inside = $p[5];
1303 } else {
1304 # tag
1305 [ , $element, $attributes, $close, $inside ] = $p;
1306 }
1307
1308 $marker = self::MARKER_PREFIX . "-$element-" . sprintf( '%08X', $n++ ) . self::MARKER_SUFFIX;
1309 $stripped .= $marker;
1310
1311 if ( $close === '/>' ) {
1312 # Empty element tag, <tag />
1313 $content = null;
1314 $text = $inside;
1315 $tail = null;
1316 } else {
1317 if ( $element === '!--' ) {
1318 $end = '/(-->)/';
1319 } else {
1320 $end = "/(<\\/$element\\s*>)/i";
1321 }
1322 $q = preg_split( $end, $inside, 2, PREG_SPLIT_DELIM_CAPTURE );
1323 $content = $q[0];
1324 if ( count( $q ) < 3 ) {
1325 # No end tag -- let it run out to the end of the text.
1326 $tail = '';
1327 $text = '';
1328 } else {
1329 [ , $tail, $text ] = $q;
1330 }
1331 }
1332
1333 $matches[$marker] = [ $element,
1334 $content,
1335 Sanitizer::decodeTagAttributes( $attributes ),
1336 "<$element$attributes$close$content$tail" ];
1337 }
1338 return $stripped;
1339 }
1340
1346 public function getStripList() {
1347 return $this->mStripList;
1348 }
1349
1354 public function getStripState() {
1355 return $this->mStripState;
1356 }
1357
1367 public function insertStripItem( $text ) {
1368 $marker = self::MARKER_PREFIX . "-item-{$this->mMarkerIndex}-" . self::MARKER_SUFFIX;
1369 $this->mMarkerIndex++;
1370 $this->mStripState->addGeneral( $marker, $text );
1371 return $marker;
1372 }
1373
1380 private function handleTables( $text ) {
1381 $lines = StringUtils::explode( "\n", $text );
1382 $out = '';
1383 $td_history = []; # Is currently a td tag open?
1384 $last_tag_history = []; # Save history of last lag activated (td, th or caption)
1385 $tr_history = []; # Is currently a tr tag open?
1386 $tr_attributes = []; # history of tr attributes
1387 $has_opened_tr = []; # Did this table open a <tr> element?
1388 $indent_level = 0; # indent level of the table
1389
1390 foreach ( $lines as $outLine ) {
1391 $line = trim( $outLine );
1392
1393 if ( $line === '' ) { # empty line, go to next line
1394 $out .= $outLine . "\n";
1395 continue;
1396 }
1397
1398 $first_character = $line[0];
1399 $first_two = substr( $line, 0, 2 );
1400 $matches = [];
1401
1402 if ( preg_match( '/^(:*)\s*\{\|(.*)$/', $line, $matches ) ) {
1403 # First check if we are starting a new table
1404 $indent_level = strlen( $matches[1] );
1405
1406 $attributes = $this->mStripState->unstripBoth( $matches[2] );
1407 $attributes = Sanitizer::fixTagAttributes( $attributes, 'table' );
1408
1409 $outLine = str_repeat( '<dl><dd>', $indent_level ) . "<table{$attributes}>";
1410 $td_history[] = false;
1411 $last_tag_history[] = '';
1412 $tr_history[] = false;
1413 $tr_attributes[] = '';
1414 $has_opened_tr[] = false;
1415 } elseif ( count( $td_history ) == 0 ) {
1416 # Don't do any of the following
1417 $out .= $outLine . "\n";
1418 continue;
1419 } elseif ( $first_two === '|}' ) {
1420 # We are ending a table
1421 $line = '</table>' . substr( $line, 2 );
1422 $last_tag = array_pop( $last_tag_history );
1423
1424 if ( !array_pop( $has_opened_tr ) ) {
1425 $line = "<tr><td></td></tr>{$line}";
1426 }
1427
1428 if ( array_pop( $tr_history ) ) {
1429 $line = "</tr>{$line}";
1430 }
1431
1432 if ( array_pop( $td_history ) ) {
1433 $line = "</{$last_tag}>{$line}";
1434 }
1435 array_pop( $tr_attributes );
1436 if ( $indent_level > 0 ) {
1437 $outLine = rtrim( $line ) . str_repeat( '</dd></dl>', $indent_level );
1438 } else {
1439 $outLine = $line;
1440 }
1441 } elseif ( $first_two === '|-' ) {
1442 # Now we have a table row
1443 $line = preg_replace( '#^\|-+#', '', $line );
1444
1445 # Whats after the tag is now only attributes
1446 $attributes = $this->mStripState->unstripBoth( $line );
1447 $attributes = Sanitizer::fixTagAttributes( $attributes, 'tr' );
1448 array_pop( $tr_attributes );
1449 $tr_attributes[] = $attributes;
1450
1451 $line = '';
1452 $last_tag = array_pop( $last_tag_history );
1453 array_pop( $has_opened_tr );
1454 $has_opened_tr[] = true;
1455
1456 if ( array_pop( $tr_history ) ) {
1457 $line = '</tr>';
1458 }
1459
1460 if ( array_pop( $td_history ) ) {
1461 $line = "</{$last_tag}>{$line}";
1462 }
1463
1464 $outLine = $line;
1465 $tr_history[] = false;
1466 $td_history[] = false;
1467 $last_tag_history[] = '';
1468 } elseif ( $first_character === '|'
1469 || $first_character === '!'
1470 || $first_two === '|+'
1471 ) {
1472 # This might be cell elements, td, th or captions
1473 if ( $first_two === '|+' ) {
1474 $first_character = '+';
1475 $line = substr( $line, 2 );
1476 } else {
1477 $line = substr( $line, 1 );
1478 }
1479
1480 // Implies both are valid for table headings.
1481 if ( $first_character === '!' ) {
1482 $line = StringUtils::replaceMarkup( '!!', '||', $line );
1483 }
1484
1485 # Split up multiple cells on the same line.
1486 # FIXME : This can result in improper nesting of tags processed
1487 # by earlier parser steps.
1488 $cells = explode( '||', $line );
1489
1490 $outLine = '';
1491
1492 # Loop through each table cell
1493 foreach ( $cells as $cell ) {
1494 $previous = '';
1495 if ( $first_character !== '+' ) {
1496 $tr_after = array_pop( $tr_attributes );
1497 if ( !array_pop( $tr_history ) ) {
1498 $previous = "<tr{$tr_after}>\n";
1499 }
1500 $tr_history[] = true;
1501 $tr_attributes[] = '';
1502 array_pop( $has_opened_tr );
1503 $has_opened_tr[] = true;
1504 }
1505
1506 $last_tag = array_pop( $last_tag_history );
1507
1508 if ( array_pop( $td_history ) ) {
1509 $previous = "</{$last_tag}>\n{$previous}";
1510 }
1511
1512 if ( $first_character === '|' ) {
1513 $last_tag = 'td';
1514 } elseif ( $first_character === '!' ) {
1515 $last_tag = 'th';
1516 } elseif ( $first_character === '+' ) {
1517 $last_tag = 'caption';
1518 } else {
1519 $last_tag = '';
1520 }
1521
1522 $last_tag_history[] = $last_tag;
1523
1524 # A cell could contain both parameters and data
1525 $cell_data = explode( '|', $cell, 2 );
1526
1527 # T2553: Note that a '|' inside an invalid link should not
1528 # be mistaken as delimiting cell parameters
1529 # Bug T153140: Neither should language converter markup.
1530 if ( preg_match( '/\[\[|-\{/', $cell_data[0] ) === 1 ) {
1531 $cell = "{$previous}<{$last_tag}>" . trim( $cell );
1532 } elseif ( count( $cell_data ) == 1 ) {
1533 // Whitespace in cells is trimmed
1534 $cell = "{$previous}<{$last_tag}>" . trim( $cell_data[0] );
1535 } else {
1536 $attributes = $this->mStripState->unstripBoth( $cell_data[0] );
1537 $attributes = Sanitizer::fixTagAttributes( $attributes, $last_tag );
1538 // Whitespace in cells is trimmed
1539 $cell = "{$previous}<{$last_tag}{$attributes}>" . trim( $cell_data[1] );
1540 }
1541
1542 $outLine .= $cell;
1543 $td_history[] = true;
1544 }
1545 }
1546 $out .= $outLine . "\n";
1547 }
1548
1549 # Closing open td, tr && table
1550 while ( count( $td_history ) > 0 ) {
1551 if ( array_pop( $td_history ) ) {
1552 $out .= "</td>\n";
1553 }
1554 if ( array_pop( $tr_history ) ) {
1555 $out .= "</tr>\n";
1556 }
1557 if ( !array_pop( $has_opened_tr ) ) {
1558 $out .= "<tr><td></td></tr>\n";
1559 }
1560
1561 $out .= "</table>\n";
1562 }
1563
1564 # Remove trailing line-ending (b/c)
1565 if ( substr( $out, -1 ) === "\n" ) {
1566 $out = substr( $out, 0, -1 );
1567 }
1568
1569 # special case: don't return empty table
1570 if ( $out === "<table>\n<tr><td></td></tr>\n</table>" ) {
1571 $out = '';
1572 }
1573
1574 return $out;
1575 }
1576
1590 public function internalParse( $text, $isMain = true, $frame = false ) {
1591 $origText = $text;
1592
1593 # Hook to suspend the parser in this state
1594 if ( !$this->hookRunner->onParserBeforeInternalParse( $this, $text, $this->mStripState ) ) {
1595 return $text;
1596 }
1597
1598 # if $frame is provided, then use $frame for replacing any variables
1599 if ( $frame ) {
1600 # use frame depth to infer how include/noinclude tags should be handled
1601 # depth=0 means this is the top-level document; otherwise it's an included document
1602 if ( !$frame->depth ) {
1603 $flag = 0;
1604 } else {
1605 $flag = Preprocessor::DOM_FOR_INCLUSION;
1606 }
1607 $dom = $this->preprocessToDom( $text, $flag );
1608 $text = $frame->expand( $dom );
1609 } else {
1610 # if $frame is not provided, then use old-style replaceVariables
1611 $text = $this->replaceVariables( $text );
1612 }
1613
1614 $text = Sanitizer::internalRemoveHtmlTags(
1615 $text,
1616 // Callback from the Sanitizer for expanding items found in
1617 // HTML attribute values, so they can be safely tested and escaped.
1618 function ( &$text, $frame = false ) {
1619 $text = $this->replaceVariables( $text, $frame );
1620 $text = $this->mStripState->unstripBoth( $text );
1621 },
1622 false,
1623 [],
1624 []
1625 );
1626 $this->hookRunner->onInternalParseBeforeLinks( $this, $text, $this->mStripState );
1627
1628 # Tables need to come after variable replacement for things to work
1629 # properly; putting them before other transformations should keep
1630 # exciting things like link expansions from showing up in surprising
1631 # places.
1632 $text = $this->handleTables( $text );
1633
1634 $text = preg_replace( '/(^|\n)-----*/', '\\1<hr />', $text );
1635
1636 $text = $this->handleDoubleUnderscore( $text );
1637
1638 $text = $this->handleHeadings( $text );
1639 $text = $this->handleInternalLinks( $text );
1640 $text = $this->handleAllQuotes( $text );
1641 $text = $this->handleExternalLinks( $text );
1642
1643 # handleInternalLinks may sometimes leave behind
1644 # absolute URLs, which have to be masked to hide them from handleExternalLinks
1645 $text = str_replace( self::MARKER_PREFIX . 'NOPARSE', '', $text );
1646
1647 $text = $this->handleMagicLinks( $text );
1648 $text = $this->finalizeHeadings( $text, $origText, $isMain );
1649
1650 return $text;
1651 }
1652
1660 return $this->languageConverterFactory->getLanguageConverter(
1661 $this->getTargetLanguage()
1662 );
1663 }
1664
1670 private function getContentLanguageConverter(): ILanguageConverter {
1671 return $this->languageConverterFactory->getLanguageConverter(
1672 $this->getContentLanguage()
1673 );
1674 }
1675
1683 protected function getHookContainer() {
1684 return $this->hookContainer;
1685 }
1686
1695 protected function getHookRunner() {
1696 return $this->hookRunner;
1697 }
1698
1708 private function internalParseHalfParsed( $text, $isMain = true, $linestart = true ) {
1709 $text = $this->mStripState->unstripGeneral( $text );
1710
1711 $text = BlockLevelPass::doBlockLevels( $text, $linestart );
1712
1713 $this->replaceLinkHoldersPrivate( $text );
1714
1722 $converter = null;
1723 if ( !( $this->mOptions->getDisableContentConversion()
1724 || isset( $this->mDoubleUnderscores['nocontentconvert'] )
1725 || $this->mOptions->getInterfaceMessage() )
1726 ) {
1727 # The position of the convert() call should not be changed. it
1728 # assumes that the links are all replaced and the only thing left
1729 # is the <nowiki> mark.
1730 $converter = $this->getTargetLanguageConverter();
1731 $text = $converter->convert( $text );
1732 // TOC will be converted below.
1733 }
1734 // Convert the TOC. This is done *after* the main text
1735 // so that all the editor-defined conversion rules (by convention
1736 // defined at the start of the article) are applied to the TOC
1737 self::localizeTOC(
1738 $this->mOutput->getTOCData(),
1739 $this->getTargetLanguage(),
1740 $converter // null if conversion is to be suppressed.
1741 );
1742 if ( $converter ) {
1743 $this->mOutput->setLanguage( new Bcp47CodeValue(
1744 LanguageCode::bcp47( $converter->getPreferredVariant() )
1745 ) );
1746 } else {
1747 $this->mOutput->setLanguage( $this->getTargetLanguage() );
1748 }
1749
1750 $text = $this->mStripState->unstripNoWiki( $text );
1751
1752 $text = $this->mStripState->unstripGeneral( $text );
1753
1754 $text = $this->tidy->tidy( $text, [ Sanitizer::class, 'armorFrenchSpaces' ] );
1755
1756 if ( $isMain ) {
1757 $this->hookRunner->onParserAfterTidy( $this, $text );
1758 }
1759
1760 return $text;
1761 }
1762
1773 private function handleMagicLinks( $text ) {
1774 $prots = $this->urlUtils->validAbsoluteProtocols();
1775 $urlChar = self::EXT_LINK_URL_CLASS;
1776 $addr = self::EXT_LINK_ADDR;
1777 $space = self::SPACE_NOT_NL; # non-newline space
1778 $spdash = "(?:-|$space)"; # a dash or a non-newline space
1779 $spaces = "$space++"; # possessive match of 1 or more spaces
1780 $text = preg_replace_callback(
1781 '!(?: # Start cases
1782 (<a[ \t\r\n>].*?</a>) | # m[1]: Skip link text
1783 (<.*?>) | # m[2]: Skip stuff inside HTML elements' . "
1784 (\b # m[3]: Free external links
1785 (?i:$prots)
1786 ($addr$urlChar*) # m[4]: Post-protocol path
1787 ) |
1788 \b(?:RFC|PMID) $spaces # m[5]: RFC or PMID, capture number
1789 ([0-9]+)\b |
1790 \bISBN $spaces ( # m[6]: ISBN, capture number
1791 (?: 97[89] $spdash? )? # optional 13-digit ISBN prefix
1792 (?: [0-9] $spdash? ){9} # 9 digits with opt. delimiters
1793 [0-9Xx] # check digit
1794 )\b
1795 )!xu",
1796 [ $this, 'magicLinkCallback' ],
1797 $text
1798 );
1799 return $text;
1800 }
1801
1806 private function magicLinkCallback( array $m ) {
1807 if ( isset( $m[1] ) && $m[1] !== '' ) {
1808 # Skip anchor
1809 return $m[0];
1810 } elseif ( isset( $m[2] ) && $m[2] !== '' ) {
1811 # Skip HTML element
1812 return $m[0];
1813 } elseif ( isset( $m[3] ) && $m[3] !== '' ) {
1814 # Free external link
1815 return $this->makeFreeExternalLink( $m[0], strlen( $m[4] ) );
1816 } elseif ( isset( $m[5] ) && $m[5] !== '' ) {
1817 # RFC or PMID
1818 if ( substr( $m[0], 0, 3 ) === 'RFC' ) {
1819 if ( !$this->mOptions->getMagicRFCLinks() ) {
1820 return $m[0];
1821 }
1822 $keyword = 'RFC';
1823 $urlmsg = 'rfcurl';
1824 $cssClass = 'mw-magiclink-rfc';
1825 $trackingCat = 'magiclink-tracking-rfc';
1826 $id = $m[5];
1827 } elseif ( substr( $m[0], 0, 4 ) === 'PMID' ) {
1828 if ( !$this->mOptions->getMagicPMIDLinks() ) {
1829 return $m[0];
1830 }
1831 $keyword = 'PMID';
1832 $urlmsg = 'pubmedurl';
1833 $cssClass = 'mw-magiclink-pmid';
1834 $trackingCat = 'magiclink-tracking-pmid';
1835 $id = $m[5];
1836 } else {
1837 // Should never happen
1838 throw new UnexpectedValueException( __METHOD__ . ': unrecognised match type "' .
1839 substr( $m[0], 0, 20 ) . '"' );
1840 }
1841 $url = wfMessage( $urlmsg, $id )->inContentLanguage()->text();
1842 $this->addTrackingCategory( $trackingCat );
1843 return $this->getLinkRenderer()->makeExternalLink(
1844 $url,
1845 "{$keyword} {$id}",
1846 $this->getTitle(),
1847 $cssClass,
1848 []
1849 );
1850 } elseif ( isset( $m[6] ) && $m[6] !== ''
1851 && $this->mOptions->getMagicISBNLinks()
1852 ) {
1853 # ISBN
1854 $isbn = $m[6];
1855 $space = self::SPACE_NOT_NL; # non-newline space
1856 $isbn = preg_replace( "/$space/", ' ', $isbn );
1857 $num = strtr( $isbn, [
1858 '-' => '',
1859 ' ' => '',
1860 'x' => 'X',
1861 ] );
1862 $this->addTrackingCategory( 'magiclink-tracking-isbn' );
1863 return $this->getLinkRenderer()->makeKnownLink(
1864 SpecialPage::getTitleFor( 'Booksources', $num ),
1865 "ISBN $isbn",
1866 [
1867 'class' => 'internal mw-magiclink-isbn',
1868 'title' => false // suppress title attribute
1869 ]
1870 );
1871 } else {
1872 return $m[0];
1873 }
1874 }
1875
1885 private function makeFreeExternalLink( $url, $numPostProto ) {
1886 $trail = '';
1887
1888 # The characters '<' and '>' (which were escaped by
1889 # internalRemoveHtmlTags()) should not be included in
1890 # URLs, per RFC 2396.
1891 # Make &nbsp; terminate a URL as well (bug T84937)
1892 $m2 = [];
1893 if ( preg_match(
1894 '/&(lt|gt|nbsp|#x0*(3[CcEe]|[Aa]0)|#0*(60|62|160));/',
1895 $url,
1896 $m2,
1897 PREG_OFFSET_CAPTURE
1898 ) ) {
1899 $trail = substr( $url, $m2[0][1] ) . $trail;
1900 $url = substr( $url, 0, $m2[0][1] );
1901 }
1902
1903 # Move trailing punctuation to $trail
1904 $sep = ',;\.:!?';
1905 # If there is no left bracket, then consider right brackets fair game too
1906 if ( strpos( $url, '(' ) === false ) {
1907 $sep .= ')';
1908 }
1909
1910 $urlRev = strrev( $url );
1911 $numSepChars = strspn( $urlRev, $sep );
1912 # Don't break a trailing HTML entity by moving the ; into $trail
1913 # This is in hot code, so use substr_compare to avoid having to
1914 # create a new string object for the comparison
1915 if ( $numSepChars && substr_compare( $url, ";", -$numSepChars, 1 ) === 0 ) {
1916 # more optimization: instead of running preg_match with a $
1917 # anchor, which can be slow, do the match on the reversed
1918 # string starting at the desired offset.
1919 # un-reversed regexp is: /&([a-z]+|#x[\da-f]+|#\d+)$/i
1920 if ( preg_match( '/\G([a-z]+|[\da-f]+x#|\d+#)&/i', $urlRev, $m2, 0, $numSepChars ) ) {
1921 $numSepChars--;
1922 }
1923 }
1924 if ( $numSepChars ) {
1925 $trail = substr( $url, -$numSepChars ) . $trail;
1926 $url = substr( $url, 0, -$numSepChars );
1927 }
1928
1929 # Verify that we still have a real URL after trail removal, and
1930 # not just lone protocol
1931 if ( strlen( $trail ) >= $numPostProto ) {
1932 return $url . $trail;
1933 }
1934
1935 $url = Sanitizer::cleanUrl( $url );
1936
1937 # Is this an external image?
1938 $text = $this->maybeMakeExternalImage( $url );
1939 if ( $text === false ) {
1940 # Not an image, make a link
1941 $text = $this->getLinkRenderer()->makeExternalLink(
1942 $url,
1943 $this->getTargetLanguageConverter()->markNoConversion( $url ),
1944 $this->getTitle(),
1945 'free',
1946 $this->getExternalLinkAttribs( $url )
1947 );
1948 # Register it in the output object...
1949 $this->mOutput->addExternalLink( $url );
1950 }
1951 return $text . $trail;
1952 }
1953
1960 private function handleHeadings( $text ) {
1961 for ( $i = 6; $i >= 1; --$i ) {
1962 $h = str_repeat( '=', $i );
1963 // Trim non-newline whitespace from headings
1964 // Using \s* will break for: "==\n===\n" and parse as <h2>=</h2>
1965 $text = preg_replace( "/^(?:$h)[ \\t]*(.+?)[ \\t]*(?:$h)\\s*$/m", "<h$i>\\1</h$i>", $text );
1966 }
1967 return $text;
1968 }
1969
1977 private function handleAllQuotes( $text ) {
1978 $outtext = '';
1979 $lines = StringUtils::explode( "\n", $text );
1980 foreach ( $lines as $line ) {
1981 $outtext .= $this->doQuotes( $line ) . "\n";
1982 }
1983 $outtext = substr( $outtext, 0, -1 );
1984 return $outtext;
1985 }
1986
1995 public function doQuotes( $text ) {
1996 $arr = preg_split( "/(''+)/", $text, -1, PREG_SPLIT_DELIM_CAPTURE );
1997 $countarr = count( $arr );
1998 if ( $countarr == 1 ) {
1999 return $text;
2000 }
2001
2002 // First, do some preliminary work. This may shift some apostrophes from
2003 // being mark-up to being text. It also counts the number of occurrences
2004 // of bold and italics mark-ups.
2005 $numbold = 0;
2006 $numitalics = 0;
2007 for ( $i = 1; $i < $countarr; $i += 2 ) {
2008 $thislen = strlen( $arr[$i] );
2009 // If there are ever four apostrophes, assume the first is supposed to
2010 // be text, and the remaining three constitute mark-up for bold text.
2011 // (T15227: ''''foo'''' turns into ' ''' foo ' ''')
2012 if ( $thislen == 4 ) {
2013 $arr[$i - 1] .= "'";
2014 $arr[$i] = "'''";
2015 $thislen = 3;
2016 } elseif ( $thislen > 5 ) {
2017 // If there are more than 5 apostrophes in a row, assume they're all
2018 // text except for the last 5.
2019 // (T15227: ''''''foo'''''' turns into ' ''''' foo ' ''''')
2020 $arr[$i - 1] .= str_repeat( "'", $thislen - 5 );
2021 $arr[$i] = "'''''";
2022 $thislen = 5;
2023 }
2024 // Count the number of occurrences of bold and italics mark-ups.
2025 if ( $thislen == 2 ) {
2026 $numitalics++;
2027 } elseif ( $thislen == 3 ) {
2028 $numbold++;
2029 } elseif ( $thislen == 5 ) {
2030 $numitalics++;
2031 $numbold++;
2032 }
2033 }
2034
2035 // If there is an odd number of both bold and italics, it is likely
2036 // that one of the bold ones was meant to be an apostrophe followed
2037 // by italics. Which one we cannot know for certain, but it is more
2038 // likely to be one that has a single-letter word before it.
2039 if ( ( $numbold % 2 == 1 ) && ( $numitalics % 2 == 1 ) ) {
2040 $firstsingleletterword = -1;
2041 $firstmultiletterword = -1;
2042 $firstspace = -1;
2043 for ( $i = 1; $i < $countarr; $i += 2 ) {
2044 if ( strlen( $arr[$i] ) == 3 ) {
2045 $x1 = substr( $arr[$i - 1], -1 );
2046 $x2 = substr( $arr[$i - 1], -2, 1 );
2047 if ( $x1 === ' ' ) {
2048 if ( $firstspace == -1 ) {
2049 $firstspace = $i;
2050 }
2051 } elseif ( $x2 === ' ' ) {
2052 $firstsingleletterword = $i;
2053 // if $firstsingleletterword is set, we don't
2054 // look at the other options, so we can bail early.
2055 break;
2056 } elseif ( $firstmultiletterword == -1 ) {
2057 $firstmultiletterword = $i;
2058 }
2059 }
2060 }
2061
2062 // If there is a single-letter word, use it!
2063 if ( $firstsingleletterword > -1 ) {
2064 $arr[$firstsingleletterword] = "''";
2065 $arr[$firstsingleletterword - 1] .= "'";
2066 } elseif ( $firstmultiletterword > -1 ) {
2067 // If not, but there's a multi-letter word, use that one.
2068 $arr[$firstmultiletterword] = "''";
2069 $arr[$firstmultiletterword - 1] .= "'";
2070 } elseif ( $firstspace > -1 ) {
2071 // ... otherwise use the first one that has neither.
2072 // (notice that it is possible for all three to be -1 if, for example,
2073 // there is only one pentuple-apostrophe in the line)
2074 $arr[$firstspace] = "''";
2075 $arr[$firstspace - 1] .= "'";
2076 }
2077 }
2078
2079 // Now let's actually convert our apostrophic mush to HTML!
2080 $output = '';
2081 $buffer = '';
2082 $state = '';
2083 $i = 0;
2084 foreach ( $arr as $r ) {
2085 if ( ( $i % 2 ) == 0 ) {
2086 if ( $state === 'both' ) {
2087 $buffer .= $r;
2088 } else {
2089 $output .= $r;
2090 }
2091 } else {
2092 $thislen = strlen( $r );
2093 if ( $thislen == 2 ) {
2094 // two quotes - open or close italics
2095 if ( $state === 'i' ) {
2096 $output .= '</i>';
2097 $state = '';
2098 } elseif ( $state === 'bi' ) {
2099 $output .= '</i>';
2100 $state = 'b';
2101 } elseif ( $state === 'ib' ) {
2102 $output .= '</b></i><b>';
2103 $state = 'b';
2104 } elseif ( $state === 'both' ) {
2105 $output .= '<b><i>' . $buffer . '</i>';
2106 $state = 'b';
2107 } else { // $state can be 'b' or ''
2108 $output .= '<i>';
2109 $state .= 'i';
2110 }
2111 } elseif ( $thislen == 3 ) {
2112 // three quotes - open or close bold
2113 if ( $state === 'b' ) {
2114 $output .= '</b>';
2115 $state = '';
2116 } elseif ( $state === 'bi' ) {
2117 $output .= '</i></b><i>';
2118 $state = 'i';
2119 } elseif ( $state === 'ib' ) {
2120 $output .= '</b>';
2121 $state = 'i';
2122 } elseif ( $state === 'both' ) {
2123 $output .= '<i><b>' . $buffer . '</b>';
2124 $state = 'i';
2125 } else { // $state can be 'i' or ''
2126 $output .= '<b>';
2127 $state .= 'b';
2128 }
2129 } elseif ( $thislen == 5 ) {
2130 // five quotes - open or close both separately
2131 if ( $state === 'b' ) {
2132 $output .= '</b><i>';
2133 $state = 'i';
2134 } elseif ( $state === 'i' ) {
2135 $output .= '</i><b>';
2136 $state = 'b';
2137 } elseif ( $state === 'bi' ) {
2138 $output .= '</i></b>';
2139 $state = '';
2140 } elseif ( $state === 'ib' ) {
2141 $output .= '</b></i>';
2142 $state = '';
2143 } elseif ( $state === 'both' ) {
2144 $output .= '<i><b>' . $buffer . '</b></i>';
2145 $state = '';
2146 } else { // ($state == '')
2147 $buffer = '';
2148 $state = 'both';
2149 }
2150 }
2151 }
2152 $i++;
2153 }
2154 // Now close all remaining tags. Notice that the order is important.
2155 if ( $state === 'b' || $state === 'ib' ) {
2156 $output .= '</b>';
2157 }
2158 if ( $state === 'i' || $state === 'bi' || $state === 'ib' ) {
2159 $output .= '</i>';
2160 }
2161 if ( $state === 'bi' ) {
2162 $output .= '</b>';
2163 }
2164 // There might be lonely ''''', so make sure we have a buffer
2165 if ( $state === 'both' && $buffer ) {
2166 $output .= '<b><i>' . $buffer . '</i></b>';
2167 }
2168 return $output;
2169 }
2170
2180 private function handleExternalLinks( $text ) {
2181 $bits = preg_split( $this->mExtLinkBracketedRegex, $text, -1, PREG_SPLIT_DELIM_CAPTURE );
2182 // @phan-suppress-next-line PhanTypeComparisonFromArray See phan issue #3161
2183 if ( $bits === false ) {
2184 throw new RuntimeException( "PCRE failure" );
2185 }
2186 $s = array_shift( $bits );
2187
2188 $i = 0;
2189 while ( $i < count( $bits ) ) {
2190 $url = $bits[$i++];
2191 $i++; // protocol
2192 $text = $bits[$i++];
2193 $trail = $bits[$i++];
2194
2195 # The characters '<' and '>' (which were escaped by
2196 # internalRemoveHtmlTags()) should not be included in
2197 # URLs, per RFC 2396.
2198 $m2 = [];
2199 if ( preg_match( '/&(lt|gt);/', $url, $m2, PREG_OFFSET_CAPTURE ) ) {
2200 $text = substr( $url, $m2[0][1] ) . ' ' . $text;
2201 $url = substr( $url, 0, $m2[0][1] );
2202 }
2203
2204 # If the link text is an image URL, replace it with an <img> tag
2205 # This happened by accident in the original parser, but some people used it extensively
2206 $img = $this->maybeMakeExternalImage( $text );
2207 if ( $img !== false ) {
2208 $text = $img;
2209 }
2210
2211 $dtrail = '';
2212
2213 # Set linktype for CSS
2214 $linktype = 'text';
2215
2216 # No link text, e.g. [http://domain.tld/some.link]
2217 if ( $text == '' ) {
2218 # Autonumber
2219 $langObj = $this->getTargetLanguage();
2220 $text = '[' . $langObj->formatNum( ++$this->mAutonumber ) . ']';
2221 $linktype = 'autonumber';
2222 } else {
2223 # Have link text, e.g. [http://domain.tld/some.link text]s
2224 # Check for trail
2225 [ $dtrail, $trail ] = Linker::splitTrail( $trail );
2226 }
2227
2228 // Excluding protocol-relative URLs may avoid many false positives.
2229 if ( preg_match( '/^(?:' . $this->urlUtils->validAbsoluteProtocols() . ')/', $text ) ) {
2230 $text = $this->getTargetLanguageConverter()->markNoConversion( $text );
2231 }
2232
2233 $url = Sanitizer::cleanUrl( $url );
2234
2235 # Use the encoded URL
2236 # This means that users can paste URLs directly into the text
2237 # Funny characters like ö aren't valid in URLs anyway
2238 # This was changed in August 2004
2239 $s .= $this->getLinkRenderer()->makeExternalLink(
2240 $url,
2241 // @phan-suppress-next-line SecurityCheck-XSS
2242 new HtmlArmor( $text ),
2243 $this->getTitle(),
2244 $linktype,
2245 $this->getExternalLinkAttribs( $url )
2246 ) . $dtrail . $trail;
2247
2248 # Register link in the output object.
2249 $this->mOutput->addExternalLink( $url );
2250 }
2251
2252 // @phan-suppress-next-line PhanTypeMismatchReturnNullable False positive from array_shift
2253 return $s;
2254 }
2255
2266 public static function getExternalLinkRel( $url = false, LinkTarget $title = null ) {
2267 $mainConfig = MediaWikiServices::getInstance()->getMainConfig();
2268 $noFollowLinks = $mainConfig->get( MainConfigNames::NoFollowLinks );
2269 $noFollowNsExceptions = $mainConfig->get( MainConfigNames::NoFollowNsExceptions );
2270 $noFollowDomainExceptions = $mainConfig->get( MainConfigNames::NoFollowDomainExceptions );
2271 $ns = $title ? $title->getNamespace() : false;
2272 if (
2273 $noFollowLinks && !in_array( $ns, $noFollowNsExceptions )
2274 && !wfGetUrlUtils()->matchesDomainList( (string)$url, $noFollowDomainExceptions )
2275 ) {
2276 return 'nofollow';
2277 }
2278 return null;
2279 }
2280
2292 public function getExternalLinkAttribs( $url ) {
2293 $attribs = [];
2294 $rel = self::getExternalLinkRel( $url, $this->getTitle() ) ?? '';
2295
2296 $target = $this->mOptions->getExternalLinkTarget();
2297 if ( $target ) {
2298 $attribs['target'] = $target;
2299 if ( !in_array( $target, [ '_self', '_parent', '_top' ] ) ) {
2300 // T133507. New windows can navigate parent cross-origin.
2301 // Including noreferrer due to lacking browser
2302 // support of noopener. Eventually noreferrer should be removed.
2303 if ( $rel !== '' ) {
2304 $rel .= ' ';
2305 }
2306 $rel .= 'noreferrer noopener';
2307 }
2308 }
2309 if ( $rel !== '' ) {
2310 $attribs['rel'] = $rel;
2311 }
2312 return $attribs;
2313 }
2314
2325 public static function normalizeLinkUrl( $url ) {
2326 # Test for RFC 3986 IPv6 syntax
2327 $scheme = '[a-z][a-z0-9+.-]*:';
2328 $userinfo = '(?:[a-z0-9\-._~!$&\'()*+,;=:]|%[0-9a-f]{2})*';
2329 $ipv6Host = '\\[((?:[0-9a-f:]|%3[0-A]|%[46][1-6])+)\\]';
2330 if ( preg_match( "<^(?:{$scheme})?//(?:{$userinfo}@)?{$ipv6Host}(?:[:/?#].*|)$>i", $url, $m ) &&
2331 IPUtils::isValid( rawurldecode( $m[1] ) )
2332 ) {
2333 $isIPv6 = rawurldecode( $m[1] );
2334 } else {
2335 $isIPv6 = false;
2336 }
2337
2338 # Make sure unsafe characters are encoded
2339 $url = preg_replace_callback(
2340 '/[\x00-\x20"<>\[\\\\\]^`{|}\x7F-\xFF]+/',
2341 static fn ( $m ) => rawurlencode( $m[0] ),
2342 $url
2343 );
2344
2345 $ret = '';
2346 $end = strlen( $url );
2347
2348 # Fragment part - 'fragment'
2349 $start = strpos( $url, '#' );
2350 if ( $start !== false && $start < $end ) {
2351 $ret = self::normalizeUrlComponent(
2352 substr( $url, $start, $end - $start ), '"#%<>[\]^`{|}' ) . $ret;
2353 $end = $start;
2354 }
2355
2356 # Query part - 'query' minus &=+;
2357 $start = strpos( $url, '?' );
2358 if ( $start !== false && $start < $end ) {
2359 $ret = self::normalizeUrlComponent(
2360 substr( $url, $start, $end - $start ), '"#%<>[\]^`{|}&=+;' ) . $ret;
2361 $end = $start;
2362 }
2363
2364 # Path part - 'pchar', remove dot segments
2365 # (find first '/' after the optional '//' after the scheme)
2366 $start = strpos( $url, '//' );
2367 $start = strpos( $url, '/', $start === false ? 0 : $start + 2 );
2368 if ( $start !== false && $start < $end ) {
2369 $ret = UrlUtils::removeDotSegments( self::normalizeUrlComponent(
2370 substr( $url, $start, $end - $start ), '"#%<>[\]^`{|}/?' ) ) . $ret;
2371 $end = $start;
2372 }
2373
2374 # Scheme and host part - 'pchar'
2375 # (we assume no userinfo or encoded colons in the host)
2376 $ret = self::normalizeUrlComponent(
2377 substr( $url, 0, $end ), '"#%<>[\]^`{|}/?' ) . $ret;
2378
2379 # Fix IPv6 syntax
2380 if ( $isIPv6 !== false ) {
2381 $ipv6Host = "%5B({$isIPv6})%5D";
2382 $ret = preg_replace(
2383 "<^((?:{$scheme})?//(?:{$userinfo}@)?){$ipv6Host}(?=[:/?#]|$)>i",
2384 "$1[$2]",
2385 $ret
2386 );
2387 }
2388
2389 return $ret;
2390 }
2391
2392 private static function normalizeUrlComponent( $component, $unsafe ) {
2393 $callback = static function ( $matches ) use ( $unsafe ) {
2394 $char = urldecode( $matches[0] );
2395 $ord = ord( $char );
2396 if ( $ord > 32 && $ord < 127 && strpos( $unsafe, $char ) === false ) {
2397 # Unescape it
2398 return $char;
2399 } else {
2400 # Leave it escaped, but use uppercase for a-f
2401 return strtoupper( $matches[0] );
2402 }
2403 };
2404 return preg_replace_callback( '/%[0-9A-Fa-f]{2}/', $callback, $component );
2405 }
2406
2415 private function maybeMakeExternalImage( $url ) {
2416 $imagesfrom = $this->mOptions->getAllowExternalImagesFrom();
2417 $imagesexception = (bool)$imagesfrom;
2418 $text = false;
2419 # $imagesfrom could be either a single string or an array of strings, parse out the latter
2420 if ( $imagesexception && is_array( $imagesfrom ) ) {
2421 $imagematch = false;
2422 foreach ( $imagesfrom as $match ) {
2423 if ( strpos( $url, $match ) === 0 ) {
2424 $imagematch = true;
2425 break;
2426 }
2427 }
2428 } elseif ( $imagesexception ) {
2429 $imagematch = ( strpos( $url, $imagesfrom ) === 0 );
2430 } else {
2431 $imagematch = false;
2432 }
2433
2434 if ( $this->mOptions->getAllowExternalImages()
2435 || ( $imagesexception && $imagematch )
2436 ) {
2437 if ( preg_match( self::EXT_IMAGE_REGEX, $url ) ) {
2438 # Image found
2439 $text = Linker::makeExternalImage( $url );
2440 }
2441 }
2442 if ( !$text && $this->mOptions->getEnableImageWhitelist()
2443 && preg_match( self::EXT_IMAGE_REGEX, $url )
2444 ) {
2445 $whitelist = explode(
2446 "\n",
2447 wfMessage( 'external_image_whitelist' )->inContentLanguage()->text()
2448 );
2449
2450 foreach ( $whitelist as $entry ) {
2451 # Sanitize the regex fragment, make it case-insensitive, ignore blank entries/comments
2452 if ( strpos( $entry, '#' ) === 0 || $entry === '' ) {
2453 continue;
2454 }
2455 // @phan-suppress-next-line SecurityCheck-ReDoS preg_quote is not wanted here
2456 if ( preg_match( '/' . str_replace( '/', '\\/', $entry ) . '/i', $url ) ) {
2457 # Image matches a whitelist entry
2458 $text = Linker::makeExternalImage( $url );
2459 break;
2460 }
2461 }
2462 }
2463 return $text;
2464 }
2465
2473 private function handleInternalLinks( $text ) {
2474 $this->mLinkHolders->merge( $this->handleInternalLinks2( $text ) );
2475 return $text;
2476 }
2477
2483 private function handleInternalLinks2( &$s ) {
2484 static $tc = false, $e1, $e1_img;
2485 # the % is needed to support urlencoded titles as well
2486 if ( !$tc ) {
2487 $tc = Title::legalChars() . '#%';
2488 # Match a link having the form [[namespace:link|alternate]]trail
2489 $e1 = "/^([{$tc}]+)(?:\\|(.+?))?]](.*)\$/sD";
2490 # Match cases where there is no "]]", which might still be images
2491 $e1_img = "/^([{$tc}]+)\\|(.*)\$/sD";
2492 }
2493
2494 $holders = new LinkHolderArray(
2495 $this,
2496 $this->getContentLanguageConverter(),
2497 $this->getHookContainer() );
2498
2499 # split the entire text string on occurrences of [[
2500 $a = StringUtils::explode( '[[', ' ' . $s );
2501 # get the first element (all text up to first [[), and remove the space we added
2502 $s = $a->current();
2503 $a->next();
2504 $line = $a->current(); # Workaround for broken ArrayIterator::next() that returns "void"
2505 $s = substr( $s, 1 );
2506
2507 $nottalk = !$this->getTitle()->isTalkPage();
2508
2509 $useLinkPrefixExtension = $this->getTargetLanguage()->linkPrefixExtension();
2510 $e2 = null;
2511 if ( $useLinkPrefixExtension ) {
2512 # Match the end of a line for a word that's not followed by whitespace,
2513 # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
2514 $charset = $this->contLang->linkPrefixCharset();
2515 $e2 = "/^((?>.*[^$charset]|))(.+)$/sDu";
2516 $m = [];
2517 if ( preg_match( $e2, $s, $m ) ) {
2518 $first_prefix = $m[2];
2519 } else {
2520 $first_prefix = false;
2521 }
2522 $prefix = false;
2523 } else {
2524 $first_prefix = false;
2525 $prefix = '';
2526 }
2527
2528 # Some namespaces don't allow subpages
2529 $useSubpages = $this->nsInfo->hasSubpages(
2530 $this->getTitle()->getNamespace()
2531 );
2532
2533 # Loop for each link
2534 for ( ; $line !== false && $line !== null; $a->next(), $line = $a->current() ) {
2535 # Check for excessive memory usage
2536 if ( $holders->isBig() ) {
2537 # Too big
2538 # Do the existence check, replace the link holders and clear the array
2539 $holders->replace( $s );
2540 $holders->clear();
2541 }
2542
2543 if ( $useLinkPrefixExtension ) {
2544 // @phan-suppress-next-line PhanTypeMismatchArgumentNullableInternal $e2 is set under this condition
2545 if ( preg_match( $e2, $s, $m ) ) {
2546 [ , $s, $prefix ] = $m;
2547 } else {
2548 $prefix = '';
2549 }
2550 # first link
2551 if ( $first_prefix ) {
2552 $prefix = $first_prefix;
2553 $first_prefix = false;
2554 }
2555 }
2556
2557 $might_be_img = false;
2558
2559 if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
2560 $text = $m[2];
2561 # If we get a ] at the beginning of $m[3] that means we have a link that's something like:
2562 # [[Image:Foo.jpg|[http://example.com desc]]] <- having three ] in a row fucks up,
2563 # the real problem is with the $e1 regex
2564 # See T1500.
2565 # Still some problems for cases where the ] is meant to be outside punctuation,
2566 # and no image is in sight. See T4095.
2567 if ( $text !== ''
2568 && substr( $m[3], 0, 1 ) === ']'
2569 && strpos( $text, '[' ) !== false
2570 ) {
2571 $text .= ']'; # so that handleExternalLinks($text) works later
2572 $m[3] = substr( $m[3], 1 );
2573 }
2574 # fix up urlencoded title texts
2575 if ( strpos( $m[1], '%' ) !== false ) {
2576 # Should anchors '#' also be rejected?
2577 $m[1] = str_replace( [ '<', '>' ], [ '&lt;', '&gt;' ], rawurldecode( $m[1] ) );
2578 }
2579 $trail = $m[3];
2580 } elseif ( preg_match( $e1_img, $line, $m ) ) {
2581 # Invalid, but might be an image with a link in its caption
2582 $might_be_img = true;
2583 $text = $m[2];
2584 if ( strpos( $m[1], '%' ) !== false ) {
2585 $m[1] = str_replace( [ '<', '>' ], [ '&lt;', '&gt;' ], rawurldecode( $m[1] ) );
2586 }
2587 $trail = "";
2588 } else { # Invalid form; output directly
2589 $s .= $prefix . '[[' . $line;
2590 continue;
2591 }
2592
2593 // @phan-suppress-next-line PhanTypePossiblyInvalidDimOffset preg_match success when reached here
2594 $origLink = ltrim( $m[1], ' ' );
2595
2596 # Don't allow internal links to pages containing
2597 # PROTO: where PROTO is a valid URL protocol; these
2598 # should be external links.
2599 if ( preg_match( '/^(?i:' . $this->urlUtils->validProtocols() . ')/', $origLink ) ) {
2600 $s .= $prefix . '[[' . $line;
2601 continue;
2602 }
2603
2604 # Make subpage if necessary
2605 if ( $useSubpages ) {
2606 $link = Linker::normalizeSubpageLink(
2607 $this->getTitle(), $origLink, $text
2608 );
2609 } else {
2610 $link = $origLink;
2611 }
2612
2613 // \x7f isn't a default legal title char, so most likely strip
2614 // markers will force us into the "invalid form" path above. But,
2615 // just in case, let's assert that xmlish tags aren't valid in
2616 // the title position.
2617 $unstrip = $this->mStripState->killMarkers( $link );
2618 $noMarkers = ( $unstrip === $link );
2619
2620 $nt = $noMarkers ? Title::newFromText( $link ) : null;
2621 if ( $nt === null ) {
2622 $s .= $prefix . '[[' . $line;
2623 continue;
2624 }
2625
2626 $ns = $nt->getNamespace();
2627 $iw = $nt->getInterwiki();
2628
2629 $noforce = ( substr( $origLink, 0, 1 ) !== ':' );
2630
2631 if ( $might_be_img ) { # if this is actually an invalid link
2632 if ( $ns === NS_FILE && $noforce ) { # but might be an image
2633 $found = false;
2634 while ( true ) {
2635 # look at the next 'line' to see if we can close it there
2636 $a->next();
2637 $next_line = $a->current();
2638 if ( $next_line === false || $next_line === null ) {
2639 break;
2640 }
2641 $m = explode( ']]', $next_line, 3 );
2642 if ( count( $m ) == 3 ) {
2643 # the first ]] closes the inner link, the second the image
2644 $found = true;
2645 $text .= "[[{$m[0]}]]{$m[1]}";
2646 $trail = $m[2];
2647 break;
2648 } elseif ( count( $m ) == 2 ) {
2649 # if there's exactly one ]] that's fine, we'll keep looking
2650 $text .= "[[{$m[0]}]]{$m[1]}";
2651 } else {
2652 # if $next_line is invalid too, we need look no further
2653 $text .= '[[' . $next_line;
2654 break;
2655 }
2656 }
2657 if ( !$found ) {
2658 # we couldn't find the end of this imageLink, so output it raw
2659 # but don't ignore what might be perfectly normal links in the text we've examined
2660 $holders->merge( $this->handleInternalLinks2( $text ) );
2661 $s .= "{$prefix}[[$link|$text";
2662 # note: no $trail, because without an end, there *is* no trail
2663 continue;
2664 }
2665 } else { # it's not an image, so output it raw
2666 $s .= "{$prefix}[[$link|$text";
2667 # note: no $trail, because without an end, there *is* no trail
2668 continue;
2669 }
2670 }
2671
2672 $wasblank = ( $text == '' );
2673 if ( $wasblank ) {
2674 $text = $link;
2675 if ( !$noforce ) {
2676 # Strip off leading ':'
2677 $text = substr( $text, 1 );
2678 }
2679 } else {
2680 # T6598 madness. Handle the quotes only if they come from the alternate part
2681 # [[Lista d''e paise d''o munno]] -> <a href="...">Lista d''e paise d''o munno</a>
2682 # [[Criticism of Harry Potter|Criticism of ''Harry Potter'']]
2683 # -> <a href="Criticism of Harry Potter">Criticism of <i>Harry Potter</i></a>
2684 $text = $this->doQuotes( $text );
2685 }
2686
2687 # Link not escaped by : , create the various objects
2688 if ( $noforce && !$nt->wasLocalInterwiki() ) {
2689 # Interwikis
2690 if (
2691 $iw && $this->mOptions->getInterwikiMagic() && $nottalk && (
2692 $this->languageNameUtils->getLanguageName(
2693 $iw,
2694 LanguageNameUtils::AUTONYMS,
2695 LanguageNameUtils::DEFINED
2696 )
2697 || in_array( $iw, $this->svcOptions->get( MainConfigNames::ExtraInterlanguageLinkPrefixes ) )
2698 )
2699 ) {
2700 # T26502: duplicates are resolved in ParserOutput
2701 $this->mOutput->addLanguageLink( $nt );
2702
2707 $s = preg_replace( '/\n\s*$/', '', $s . $prefix ) . $trail;
2708 continue;
2709 }
2710
2711 if ( $ns === NS_FILE ) {
2712 if ( $wasblank ) {
2713 # if no parameters were passed, $text
2714 # becomes something like "File:Foo.png",
2715 # which we don't want to pass on to the
2716 # image generator
2717 $text = '';
2718 } else {
2719 # recursively parse links inside the image caption
2720 # actually, this will parse them in any other parameters, too,
2721 # but it might be hard to fix that, and it doesn't matter ATM
2722 $text = $this->handleExternalLinks( $text );
2723 $holders->merge( $this->handleInternalLinks2( $text ) );
2724 }
2725 # cloak any absolute URLs inside the image markup, so handleExternalLinks() won't touch them
2726 $s .= $prefix . $this->armorLinks(
2727 $this->makeImage( $nt, $text, $holders ) ) . $trail;
2728 continue;
2729 } elseif ( $ns === NS_CATEGORY ) {
2730 # Strip newlines from the left hand context of Category
2731 # links.
2732 # See T2087, T87753, T174639, T359886
2733 $s = preg_replace( '/\n\s*$/', '', $s . $prefix ) . $trail;
2734
2735 $sortkey = ''; // filled in by CategoryLinksTable
2736 if ( !$wasblank ) {
2737 $sortkey = $text;
2738 }
2739 $this->mOutput->addCategory( $nt, $sortkey );
2740
2741 continue;
2742 }
2743 }
2744
2745 # Self-link checking. For some languages, variants of the title are checked in
2746 # LinkHolderArray::doVariants() to allow batching the existence checks necessary
2747 # for linking to a different variant.
2748 if ( $ns !== NS_SPECIAL && $nt->equals( $this->getTitle() ) ) {
2749 $s .= $prefix . Linker::makeSelfLinkObj( $nt, $text, '', $trail, '',
2750 Sanitizer::escapeIdForLink( $nt->getFragment() ) );
2751 continue;
2752 }
2753
2754 # NS_MEDIA is a pseudo-namespace for linking directly to a file
2755 # @todo FIXME: Should do batch file existence checks, see comment below
2756 if ( $ns === NS_MEDIA ) {
2757 # Give extensions a chance to select the file revision for us
2758 $options = [];
2759 $descQuery = false;
2760 $this->hookRunner->onBeforeParserFetchFileAndTitle(
2761 // @phan-suppress-next-line PhanTypeMismatchArgument Type mismatch on pass-by-ref args
2762 $this, $nt, $options, $descQuery
2763 );
2764 # Fetch and register the file (file title may be different via hooks)
2765 [ $file, $nt ] = $this->fetchFileAndTitle( $nt, $options );
2766 # Cloak with NOPARSE to avoid replacement in handleExternalLinks
2767 $s .= $prefix . $this->armorLinks(
2768 Linker::makeMediaLinkFile( $nt, $file, $text ) ) . $trail;
2769 continue;
2770 }
2771
2772 # Some titles, such as valid special pages or files in foreign repos, should
2773 # be shown as bluelinks even though they're not included in the page table
2774 # @todo FIXME: isAlwaysKnown() can be expensive for file links; we should really do
2775 # batch file existence checks for NS_FILE and NS_MEDIA
2776 if ( $iw == '' && $nt->isAlwaysKnown() ) {
2777 $this->mOutput->addLink( $nt );
2778 $s .= $this->makeKnownLinkHolder( $nt, $text, $trail, $prefix );
2779 } else {
2780 # Links will be added to the output link list after checking
2781 $s .= $holders->makeHolder( $nt, $text, $trail, $prefix );
2782 }
2783 }
2784 return $holders;
2785 }
2786
2800 private function makeKnownLinkHolder( LinkTarget $nt, $text = '', $trail = '', $prefix = '' ) {
2801 [ $inside, $trail ] = Linker::splitTrail( $trail );
2802
2803 if ( $text == '' ) {
2804 $text = htmlspecialchars( $this->titleFormatter->getPrefixedText( $nt ) );
2805 }
2806
2807 $link = $this->getLinkRenderer()->makeKnownLink(
2808 $nt, new HtmlArmor( "$prefix$text$inside" )
2809 );
2810
2811 return $this->armorLinks( $link ) . $trail;
2812 }
2813
2824 private function armorLinks( $text ) {
2825 return preg_replace( '/\b((?i)' . $this->urlUtils->validProtocols() . ')/',
2826 self::MARKER_PREFIX . "NOPARSE$1", $text );
2827 }
2828
2838 public function doBlockLevels( $text, $linestart ) {
2839 wfDeprecated( __METHOD__, '1.35' );
2840 return BlockLevelPass::doBlockLevels( $text, $linestart );
2841 }
2842
2851 private function expandMagicVariable( $index, $frame = false ) {
2856 if ( isset( $this->mVarCache[$index] ) ) {
2857 return $this->mVarCache[$index];
2858 }
2859
2860 $ts = new MWTimestamp( $this->mOptions->getTimestamp() /* TS_MW */ );
2861 if ( $this->hookContainer->isRegistered( 'ParserGetVariableValueTs' ) ) {
2862 $s = $ts->getTimestamp( TS_UNIX );
2863 $this->hookRunner->onParserGetVariableValueTs( $this, $s );
2864 $ts = new MWTimestamp( $s );
2865 }
2866
2867 $value = CoreMagicVariables::expand(
2868 $this, $index, $ts, $this->svcOptions, $this->logger
2869 );
2870
2871 if ( $value === null ) {
2872 // Not a defined core magic word
2873 // Don't give this hook unrestricted access to mVarCache
2874 $fakeCache = [];
2875 $this->hookRunner->onParserGetVariableValueSwitch(
2876 // @phan-suppress-next-line PhanTypeMismatchArgument $value is passed as null but returned as string
2877 $this, $fakeCache, $index, $value, $frame
2878 );
2879 // Cache the value returned by the hook by falling through here.
2880 // Assert the the hook returned a non-null value for this MV
2881 '@phan-var string $value';
2882 }
2883
2884 $this->mVarCache[$index] = $value;
2885
2886 return $value;
2887 }
2888
2893 private function initializeVariables() {
2894 $variableIDs = $this->magicWordFactory->getVariableIDs();
2895
2896 $this->mVariables = $this->magicWordFactory->newArray( $variableIDs );
2897 $this->mSubstWords = $this->magicWordFactory->getSubstArray();
2898 }
2899
2918 public function preprocessToDom( $text, $flags = 0 ) {
2919 return $this->getPreprocessor()->preprocessToObj( $text, $flags );
2920 }
2921
2943 public function replaceVariables( $text, $frame = false, $argsOnly = false ) {
2944 # Is there any text? Also, Prevent too big inclusions!
2945 $textSize = strlen( $text );
2946 if ( $textSize < 1 || $textSize > $this->mOptions->getMaxIncludeSize() ) {
2947 return $text;
2948 }
2949
2950 if ( $frame === false ) {
2951 $frame = $this->getPreprocessor()->newFrame();
2952 } elseif ( !( $frame instanceof PPFrame ) ) {
2954 __METHOD__ . " called using plain parameters instead of " .
2955 "a PPFrame instance. Creating custom frame.",
2956 '1.43'
2957 );
2958 $frame = $this->getPreprocessor()->newCustomFrame( $frame );
2959 }
2960
2961 $dom = $this->preprocessToDom( $text );
2962 $flags = $argsOnly ? PPFrame::NO_TEMPLATES : 0;
2963 $text = $frame->expand( $dom, $flags );
2964
2965 return $text;
2966 }
2967
2995 public function limitationWarn( $limitationType, $current = '', $max = '' ) {
2996 # does no harm if $current and $max are present but are unnecessary for the message
2997 # Not doing ->inLanguage( $this->mOptions->getUserLangObj() ), since this is shown
2998 # only during preview, and that would split the parser cache unnecessarily.
2999 $this->mOutput->addWarningMsg(
3000 "$limitationType-warning",
3001 Message::numParam( $current ),
3002 Message::numParam( $max )
3003 );
3004 $this->addTrackingCategory( "$limitationType-category" );
3005 }
3006
3020 public function braceSubstitution( array $piece, PPFrame $frame ) {
3021 // Flags
3022
3023 // $text has been filled
3024 $found = false;
3025 $text = '';
3026 // wiki markup in $text should be escaped
3027 $nowiki = false;
3028 // $text is HTML, armour it against wikitext transformation
3029 $isHTML = false;
3030 // Force interwiki transclusion to be done in raw mode not rendered
3031 $forceRawInterwiki = false;
3032 // $text is a DOM node needing expansion in a child frame
3033 $isChildObj = false;
3034 // $text is a DOM node needing expansion in the current frame
3035 $isLocalObj = false;
3036
3037 # Title object, where $text came from
3038 $title = false;
3039
3040 # $part1 is the bit before the first |, and must contain only title characters.
3041 # Various prefixes will be stripped from it later.
3042 $titleWithSpaces = $frame->expand( $piece['title'] );
3043 $part1 = trim( $titleWithSpaces );
3044 $titleText = false;
3045
3046 # Original title text preserved for various purposes
3047 $originalTitle = $part1;
3048
3049 # $args is a list of argument nodes, starting from index 0, not including $part1
3050 $args = $piece['parts'];
3051
3052 $profileSection = null; // profile templates
3053
3054 $sawDeprecatedTemplateEquals = false; // T91154
3055
3056 # SUBST
3057 // @phan-suppress-next-line PhanImpossibleCondition
3058 if ( !$found ) {
3059 $substMatch = $this->mSubstWords->matchStartAndRemove( $part1 );
3060 $part1 = trim( $part1 );
3061
3062 # Possibilities for substMatch: "subst", "safesubst" or FALSE
3063 # Decide whether to expand template or keep wikitext as-is.
3064 if ( $this->ot['wiki'] ) {
3065 if ( $substMatch === false ) {
3066 $literal = true; # literal when in PST with no prefix
3067 } else {
3068 $literal = false; # expand when in PST with subst: or safesubst:
3069 }
3070 } else {
3071 if ( $substMatch == 'subst' ) {
3072 $literal = true; # literal when not in PST with plain subst:
3073 } else {
3074 $literal = false; # expand when not in PST with safesubst: or no prefix
3075 }
3076 }
3077 if ( $literal ) {
3078 $text = $frame->virtualBracketedImplode( '{{', '|', '}}', $titleWithSpaces, $args );
3079 $isLocalObj = true;
3080 $found = true;
3081 }
3082 }
3083
3084 # Variables
3085 if ( !$found && $args->getLength() == 0 ) {
3086 $id = $this->mVariables->matchStartToEnd( $part1 );
3087 if ( $id !== false ) {
3088 if ( strpos( $part1, ':' ) !== false ) {
3090 'Registering a magic variable with a name including a colon',
3091 '1.39', false, false
3092 );
3093 }
3094 $text = $this->expandMagicVariable( $id, $frame );
3095 $found = true;
3096 }
3097 }
3098
3099 # MSG, MSGNW and RAW
3100 if ( !$found ) {
3101 # Check for MSGNW:
3102 $mwMsgnw = $this->magicWordFactory->get( 'msgnw' );
3103 if ( $mwMsgnw->matchStartAndRemove( $part1 ) ) {
3104 $nowiki = true;
3105 } else {
3106 # Remove obsolete MSG:
3107 $mwMsg = $this->magicWordFactory->get( 'msg' );
3108 $mwMsg->matchStartAndRemove( $part1 );
3109 }
3110
3111 # Check for RAW:
3112 $mwRaw = $this->magicWordFactory->get( 'raw' );
3113 if ( $mwRaw->matchStartAndRemove( $part1 ) ) {
3114 $forceRawInterwiki = true;
3115 }
3116 }
3117
3118 # Parser functions
3119 if ( !$found ) {
3120 $colonPos = strpos( $part1, ':' );
3121 if ( $colonPos !== false ) {
3122 $func = substr( $part1, 0, $colonPos );
3123 $funcArgs = [ trim( substr( $part1, $colonPos + 1 ) ) ];
3124 $argsLength = $args->getLength();
3125 for ( $i = 0; $i < $argsLength; $i++ ) {
3126 $funcArgs[] = $args->item( $i );
3127 }
3128
3129 $result = $this->callParserFunction( $frame, $func, $funcArgs );
3130
3131 // Extract any forwarded flags
3132 if ( isset( $result['title'] ) ) {
3133 $title = $result['title'];
3134 }
3135 if ( isset( $result['found'] ) ) {
3136 $found = $result['found'];
3137 }
3138 if ( array_key_exists( 'text', $result ) ) {
3139 // a string or null
3140 $text = $result['text'];
3141 }
3142 if ( isset( $result['nowiki'] ) ) {
3143 $nowiki = $result['nowiki'];
3144 }
3145 if ( isset( $result['isHTML'] ) ) {
3146 $isHTML = $result['isHTML'];
3147 }
3148 if ( isset( $result['forceRawInterwiki'] ) ) {
3149 $forceRawInterwiki = $result['forceRawInterwiki'];
3150 }
3151 if ( isset( $result['isChildObj'] ) ) {
3152 $isChildObj = $result['isChildObj'];
3153 }
3154 if ( isset( $result['isLocalObj'] ) ) {
3155 $isLocalObj = $result['isLocalObj'];
3156 }
3157 }
3158 }
3159
3160 # Finish mangling title and then check for loops.
3161 # Set $title to a Title object and $titleText to the PDBK
3162 if ( !$found ) {
3163 $ns = NS_TEMPLATE;
3164 # Split the title into page and subpage
3165 $subpage = '';
3166 $relative = Linker::normalizeSubpageLink(
3167 $this->getTitle(), $part1, $subpage
3168 );
3169 if ( $part1 !== $relative ) {
3170 $part1 = $relative;
3171 $ns = $this->getTitle()->getNamespace();
3172 }
3173 $title = Title::newFromText( $part1, $ns );
3174 if ( $title ) {
3175 $titleText = $title->getPrefixedText();
3176 # Check for language variants if the template is not found
3177 if ( $this->getTargetLanguageConverter()->hasVariants() && $title->getArticleID() == 0 ) {
3178 $this->getTargetLanguageConverter()->findVariantLink( $part1, $title, true );
3179 }
3180 # Do recursion depth check
3181 $limit = $this->mOptions->getMaxTemplateDepth();
3182 if ( $frame->depth >= $limit ) {
3183 $found = true;
3184 $text = '<span class="error">'
3185 . wfMessage( 'parser-template-recursion-depth-warning' )
3186 ->numParams( $limit )->inContentLanguage()->text()
3187 . '</span>';
3188 }
3189 }
3190 }
3191
3192 # Load from database
3193 if ( !$found && $title ) {
3194 $profileSection = $this->mProfiler->scopedProfileIn( $title->getPrefixedDBkey() );
3195 if ( !$title->isExternal() ) {
3196 if ( $title->isSpecialPage()
3197 && $this->mOptions->getAllowSpecialInclusion()
3198 && $this->ot['html']
3199 ) {
3200 $specialPage = $this->specialPageFactory->getPage( $title->getDBkey() );
3201 // Pass the template arguments as URL parameters.
3202 // "uselang" will have no effect since the Language object
3203 // is forced to the one defined in ParserOptions.
3204 $pageArgs = [];
3205 $argsLength = $args->getLength();
3206 for ( $i = 0; $i < $argsLength; $i++ ) {
3207 $bits = $args->item( $i )->splitArg();
3208 if ( strval( $bits['index'] ) === '' ) {
3209 $name = trim( $frame->expand( $bits['name'], PPFrame::STRIP_COMMENTS ) );
3210 $value = trim( $frame->expand( $bits['value'] ) );
3211 $pageArgs[$name] = $value;
3212 }
3213 }
3214
3215 // Create a new context to execute the special page
3216 $context = new RequestContext;
3217 $context->setTitle( $title );
3218 $context->setRequest( new FauxRequest( $pageArgs ) );
3219 if ( $specialPage && $specialPage->maxIncludeCacheTime() === 0 ) {
3220 $context->setUser( $this->userFactory->newFromUserIdentity( $this->getUserIdentity() ) );
3221 } else {
3222 // If this page is cached, then we better not be per user.
3223 $context->setUser( User::newFromName( '127.0.0.1', false ) );
3224 }
3225 $context->setLanguage( $this->mOptions->getUserLangObj() );
3226 $ret = $this->specialPageFactory->capturePath( $title, $context, $this->getLinkRenderer() );
3227 if ( $ret ) {
3228 $text = $context->getOutput()->getHTML();
3229 $this->mOutput->addOutputPageMetadata( $context->getOutput() );
3230 $found = true;
3231 $isHTML = true;
3232 if ( $specialPage && $specialPage->maxIncludeCacheTime() !== false ) {
3233 $this->mOutput->updateRuntimeAdaptiveExpiry(
3234 $specialPage->maxIncludeCacheTime()
3235 );
3236 }
3237 }
3238 } elseif ( $this->nsInfo->isNonincludable( $title->getNamespace() ) ) {
3239 $found = false; # access denied
3240 $this->logger->debug(
3241 __METHOD__ .
3242 ": template inclusion denied for " . $title->getPrefixedDBkey()
3243 );
3244 } else {
3245 [ $text, $title ] = $this->getTemplateDom( $title );
3246 if ( $text !== false ) {
3247 $found = true;
3248 $isChildObj = true;
3249 if (
3250 $title->getNamespace() === NS_TEMPLATE &&
3251 $title->getDBkey() === '=' &&
3252 $originalTitle === '='
3253 ) {
3254 // Note that we won't get here if `=` is evaluated
3255 // (in the future) as a parser function, nor if
3256 // the Template namespace is given explicitly,
3257 // ie `{{Template:=}}`. Only `{{=}}` triggers.
3258 $sawDeprecatedTemplateEquals = true; // T91154
3259 }
3260 }
3261 }
3262
3263 # If the title is valid but undisplayable, make a link to it
3264 if ( !$found && ( $this->ot['html'] || $this->ot['pre'] ) ) {
3265 $text = "[[:$titleText]]";
3266 $found = true;
3267 }
3268 } elseif ( $title->isTrans() ) {
3269 # Interwiki transclusion
3270 if ( $this->ot['html'] && !$forceRawInterwiki ) {
3271 $text = $this->interwikiTransclude( $title, 'render' );
3272 $isHTML = true;
3273 } else {
3274 $text = $this->interwikiTransclude( $title, 'raw' );
3275 # Preprocess it like a template
3276 $text = $this->preprocessToDom( $text, Preprocessor::DOM_FOR_INCLUSION );
3277 $isChildObj = true;
3278 }
3279 $found = true;
3280 }
3281
3282 # Do infinite loop check
3283 # This has to be done after redirect resolution to avoid infinite loops via redirects
3284 if ( !$frame->loopCheck( $title ) ) {
3285 $found = true;
3286 $text = '<span class="error">'
3287 . wfMessage( 'parser-template-loop-warning', $titleText )->inContentLanguage()->text()
3288 . '</span>';
3289 $this->addTrackingCategory( 'template-loop-category' );
3290 $this->mOutput->addWarningMsg(
3291 'template-loop-warning',
3292 Message::plaintextParam( $titleText )
3293 );
3294 $this->logger->debug( __METHOD__ . ": template loop broken at '$titleText'" );
3295 }
3296 }
3297
3298 # If we haven't found text to substitute by now, we're done
3299 # Recover the source wikitext and return it
3300 if ( !$found ) {
3301 $text = $frame->virtualBracketedImplode( '{{', '|', '}}', $titleWithSpaces, $args );
3302 if ( $profileSection ) {
3303 $this->mProfiler->scopedProfileOut( $profileSection );
3304 }
3305 return [ 'object' => $text ];
3306 }
3307
3308 # Expand DOM-style return values in a child frame
3309 if ( $isChildObj ) {
3310 # Clean up argument array
3311 $newFrame = $frame->newChild( $args, $title );
3312
3313 if ( $nowiki ) {
3314 $text = $newFrame->expand( $text, PPFrame::RECOVER_ORIG );
3315 } elseif ( $titleText !== false && $newFrame->isEmpty() ) {
3316 # Expansion is eligible for the empty-frame cache
3317 $text = $newFrame->cachedExpand( $titleText, $text );
3318 } else {
3319 # Uncached expansion
3320 $text = $newFrame->expand( $text );
3321 }
3322 }
3323 if ( $isLocalObj && $nowiki ) {
3324 $text = $frame->expand( $text, PPFrame::RECOVER_ORIG );
3325 $isLocalObj = false;
3326 }
3327
3328 if ( $profileSection ) {
3329 $this->mProfiler->scopedProfileOut( $profileSection );
3330 }
3331 if (
3332 $sawDeprecatedTemplateEquals &&
3333 $this->mStripState->unstripBoth( $text ) !== '='
3334 ) {
3335 // T91154: {{=}} is deprecated when it doesn't expand to `=`;
3336 // use {{Template:=}} if you must.
3337 $this->addTrackingCategory( 'template-equals-category' );
3338 $this->mOutput->addWarningMsg( 'template-equals-warning' );
3339 }
3340
3341 # Replace raw HTML by a placeholder
3342 if ( $isHTML ) {
3343 // @phan-suppress-next-line SecurityCheck-XSS
3344 $text = $this->insertStripItem( $text );
3345 } elseif ( $nowiki && ( $this->ot['html'] || $this->ot['pre'] ) ) {
3346 # Escape nowiki-style return values
3347 // @phan-suppress-next-line SecurityCheck-DoubleEscaped
3348 $text = wfEscapeWikiText( $text );
3349 } elseif ( is_string( $text )
3350 && !$piece['lineStart']
3351 && preg_match( '/^(?:{\\||:|;|#|\*)/', $text )
3352 ) {
3353 # T2529: if the template begins with a table or block-level
3354 # element, it should be treated as beginning a new line.
3355 # This behavior is somewhat controversial.
3356 $text = "\n" . $text;
3357 }
3358
3359 if ( is_string( $text ) && !$this->incrementIncludeSize( 'post-expand', strlen( $text ) ) ) {
3360 # Error, oversize inclusion
3361 if ( $titleText !== false ) {
3362 # Make a working, properly escaped link if possible (T25588)
3363 $text = "[[:$titleText]]";
3364 } else {
3365 # This will probably not be a working link, but at least it may
3366 # provide some hint of where the problem is
3367 $originalTitle = preg_replace( '/^:/', '', $originalTitle );
3368 $text = "[[:$originalTitle]]";
3369 }
3370 $text .= $this->insertStripItem( '<!-- WARNING: template omitted, '
3371 . 'post-expand include size too large -->' );
3372 $this->limitationWarn( 'post-expand-template-inclusion' );
3373 }
3374
3375 if ( $isLocalObj ) {
3376 $ret = [ 'object' => $text ];
3377 } else {
3378 $ret = [ 'text' => $text ];
3379 }
3380
3381 return $ret;
3382 }
3383
3402 public function callParserFunction( PPFrame $frame, $function, array $args = [] ) {
3403 # Case sensitive functions
3404 if ( isset( $this->mFunctionSynonyms[1][$function] ) ) {
3405 $function = $this->mFunctionSynonyms[1][$function];
3406 } else {
3407 # Case insensitive functions
3408 $function = $this->contLang->lc( $function );
3409 if ( isset( $this->mFunctionSynonyms[0][$function] ) ) {
3410 $function = $this->mFunctionSynonyms[0][$function];
3411 } else {
3412 return [ 'found' => false ];
3413 }
3414 }
3415
3416 [ $callback, $flags ] = $this->mFunctionHooks[$function];
3417
3418 $allArgs = [ $this ];
3419 if ( $flags & self::SFH_OBJECT_ARGS ) {
3420 # Convert arguments to PPNodes and collect for appending to $allArgs
3421 $funcArgs = [];
3422 foreach ( $args as $k => $v ) {
3423 if ( $v instanceof PPNode || $k === 0 ) {
3424 $funcArgs[] = $v;
3425 } else {
3426 $funcArgs[] = $this->mPreprocessor->newPartNodeArray( [ $k => $v ] )->item( 0 );
3427 }
3428 }
3429
3430 # Add a frame parameter, and pass the arguments as an array
3431 $allArgs[] = $frame;
3432 $allArgs[] = $funcArgs;
3433 } else {
3434 # Convert arguments to plain text and append to $allArgs
3435 foreach ( $args as $k => $v ) {
3436 if ( $v instanceof PPNode ) {
3437 $allArgs[] = trim( $frame->expand( $v ) );
3438 } elseif ( is_int( $k ) && $k >= 0 ) {
3439 $allArgs[] = trim( $v );
3440 } else {
3441 $allArgs[] = trim( "$k=$v" );
3442 }
3443 }
3444 }
3445
3446 $result = $callback( ...$allArgs );
3447
3448 # The interface for function hooks allows them to return a wikitext
3449 # string or an array containing the string and any flags. This mungs
3450 # things around to match what this method should return.
3451 if ( !is_array( $result ) ) {
3452 $result = [
3453 'found' => true,
3454 'text' => $result,
3455 ];
3456 } else {
3457 if ( isset( $result[0] ) && !isset( $result['text'] ) ) {
3458 $result['text'] = $result[0];
3459 }
3460 unset( $result[0] );
3461 $result += [
3462 'found' => true,
3463 ];
3464 }
3465
3466 $noparse = true;
3467 $preprocessFlags = 0;
3468 if ( isset( $result['noparse'] ) ) {
3469 $noparse = $result['noparse'];
3470 }
3471 if ( isset( $result['preprocessFlags'] ) ) {
3472 $preprocessFlags = $result['preprocessFlags'];
3473 }
3474
3475 if ( !$noparse ) {
3476 $result['text'] = $this->preprocessToDom( $result['text'], $preprocessFlags );
3477 $result['isChildObj'] = true;
3478 }
3479
3480 return $result;
3481 }
3482
3492 public function getTemplateDom( LinkTarget $title ) {
3493 $cacheTitle = $title;
3494 $titleKey = CacheKeyHelper::getKeyForPage( $title );
3495
3496 if ( isset( $this->mTplRedirCache[$titleKey] ) ) {
3497 [ $ns, $dbk ] = $this->mTplRedirCache[$titleKey];
3498 $title = Title::makeTitle( $ns, $dbk );
3499 $titleKey = CacheKeyHelper::getKeyForPage( $title );
3500 }
3501 if ( isset( $this->mTplDomCache[$titleKey] ) ) {
3502 return [ $this->mTplDomCache[$titleKey], $title ];
3503 }
3504
3505 # Cache miss, go to the database
3506 [ $text, $title ] = $this->fetchTemplateAndTitle( $title );
3507
3508 if ( $text === false ) {
3509 $this->mTplDomCache[$titleKey] = false;
3510 return [ false, $title ];
3511 }
3512
3513 $dom = $this->preprocessToDom( $text, Preprocessor::DOM_FOR_INCLUSION );
3514 $this->mTplDomCache[$titleKey] = $dom;
3515
3516 if ( !$title->isSamePageAs( $cacheTitle ) ) {
3517 $this->mTplRedirCache[ CacheKeyHelper::getKeyForPage( $cacheTitle ) ] =
3518 [ $title->getNamespace(), $title->getDBkey() ];
3519 }
3520
3521 return [ $dom, $title ];
3522 }
3523
3538 $cacheKey = CacheKeyHelper::getKeyForPage( $link );
3539 if ( !$this->currentRevisionCache ) {
3540 $this->currentRevisionCache = new MapCacheLRU( 100 );
3541 }
3542 if ( !$this->currentRevisionCache->has( $cacheKey ) ) {
3543 $title = Title::newFromLinkTarget( $link ); // hook signature compat
3544 $revisionRecord =
3545 // Defaults to Parser::statelessFetchRevisionRecord()
3546 call_user_func(
3547 $this->mOptions->getCurrentRevisionRecordCallback(),
3548 $title,
3549 $this
3550 );
3551 if ( $revisionRecord === false ) {
3552 // Parser::statelessFetchRevisionRecord() can return false;
3553 // normalize it to null.
3554 $revisionRecord = null;
3555 }
3556 $this->currentRevisionCache->set( $cacheKey, $revisionRecord );
3557 }
3558 return $this->currentRevisionCache->get( $cacheKey );
3559 }
3560
3568 $key = CacheKeyHelper::getKeyForPage( $link );
3569 return (
3570 $this->currentRevisionCache &&
3571 $this->currentRevisionCache->has( $key )
3572 );
3573 }
3574
3583 public static function statelessFetchRevisionRecord( LinkTarget $link, $parser = null ) {
3584 if ( $link instanceof PageIdentity ) {
3585 // probably a Title, just use it.
3586 $page = $link;
3587 } else {
3588 // XXX: use RevisionStore::getPageForLink()!
3589 // ...but get the info for the current revision at the same time?
3590 // Should RevisionStore::getKnownCurrentRevision accept a LinkTarget?
3591 $page = Title::newFromLinkTarget( $link );
3592 }
3593
3594 $revRecord = MediaWikiServices::getInstance()
3595 ->getRevisionLookup()
3596 ->getKnownCurrentRevision( $page );
3597 return $revRecord;
3598 }
3599
3606 public function fetchTemplateAndTitle( LinkTarget $link ) {
3607 // Use Title for compatibility with callbacks and return type
3608 $title = Title::newFromLinkTarget( $link );
3609
3610 // Defaults to Parser::statelessFetchTemplate()
3611 $templateCb = $this->mOptions->getTemplateCallback();
3612 $stuff = $templateCb( $title, $this );
3613 $revRecord = $stuff['revision-record'] ?? null;
3614
3615 $text = $stuff['text'];
3616 if ( is_string( $stuff['text'] ) ) {
3617 // We use U+007F DELETE to distinguish strip markers from regular text
3618 $text = strtr( $text, "\x7f", "?" );
3619 }
3620 $finalTitle = $stuff['finalTitle'] ?? $title;
3621 foreach ( ( $stuff['deps'] ?? [] ) as $dep ) {
3622 $this->mOutput->addTemplate( $dep['title'], $dep['page_id'], $dep['rev_id'] );
3623 if ( $dep['title']->equals( $this->getTitle() ) && $revRecord instanceof RevisionRecord ) {
3624 // Self-transclusion; final result may change based on the new page version
3625 try {
3626 $sha1 = $revRecord->getSha1();
3627 } catch ( RevisionAccessException $e ) {
3628 $sha1 = null;
3629 }
3630 $this->setOutputFlag( ParserOutputFlags::VARY_REVISION_SHA1, 'Self transclusion' );
3631 $this->getOutput()->setRevisionUsedSha1Base36( $sha1 );
3632 }
3633 }
3634
3635 return [ $text, $finalTitle ];
3636 }
3637
3648 public static function statelessFetchTemplate( $page, $parser = false ) {
3649 $title = Title::castFromLinkTarget( $page ); // for compatibility with return type
3650 $text = $skip = false;
3651 $finalTitle = $title;
3652 $deps = [];
3653 $revRecord = null;
3654 $contextTitle = $parser ? $parser->getTitle() : null;
3655
3656 # Loop to fetch the article, with up to 2 redirects
3657
3658 # Note that $title (including redirect targets) could be
3659 # external; we do allow hooks a chance to redirect the
3660 # external title to a local one (which might be useful), but
3661 # are careful not to add external titles to the dependency
3662 # list. (T362221)
3663
3664 $services = MediaWikiServices::getInstance();
3665 $revLookup = $services->getRevisionLookup();
3666 $hookRunner = new HookRunner( $services->getHookContainer() );
3667 for ( $i = 0; $i < 3 && is_object( $title ); $i++ ) {
3668 # Give extensions a chance to select the revision instead
3669 $revRecord = null; # Assume no hook
3670 $origTitle = $title;
3671 $titleChanged = false;
3672 $hookRunner->onBeforeParserFetchTemplateRevisionRecord(
3673 # The $title is a not a PageIdentity, as it may
3674 # contain fragments or even represent an attempt to transclude
3675 # a broken or otherwise-missing Title, which the hook may
3676 # fix up. Similarly, the $contextTitle may represent a special
3677 # page or other page which "exists" as a parsing context but
3678 # is not in the DB.
3679 $contextTitle, $title,
3680 $skip, $revRecord
3681 );
3682
3683 if ( $skip ) {
3684 $text = false;
3685 if ( !$title->isExternal() ) {
3686 $deps[] = [
3687 'title' => $title,
3688 'page_id' => $title->getArticleID(),
3689 'rev_id' => null
3690 ];
3691 }
3692 break;
3693 }
3694 # Get the revision
3695 if ( !$revRecord ) {
3696 if ( $parser ) {
3697 $revRecord = $parser->fetchCurrentRevisionRecordOfTitle( $title );
3698 } else {
3699 $revRecord = $revLookup->getRevisionByTitle( $title );
3700 }
3701 }
3702 if ( $revRecord ) {
3703 # Update title, as $revRecord may have been changed by hook
3704 $title = Title::newFromLinkTarget(
3705 $revRecord->getPageAsLinkTarget()
3706 );
3707 // Assuming title is not external if we've got a $revRecord
3708 $deps[] = [
3709 'title' => $title,
3710 'page_id' => $revRecord->getPageId(),
3711 'rev_id' => $revRecord->getId(),
3712 ];
3713 } elseif ( !$title->isExternal() ) {
3714 $deps[] = [
3715 'title' => $title,
3716 'page_id' => $title->getArticleID(),
3717 'rev_id' => null,
3718 ];
3719 }
3720 if ( !$title->equals( $origTitle ) ) {
3721 # If we fetched a rev from a different title, register
3722 # the original title too...
3723 if ( !$origTitle->isExternal() ) {
3724 $deps[] = [
3725 'title' => $origTitle,
3726 'page_id' => $origTitle->getArticleID(),
3727 'rev_id' => null,
3728 ];
3729 }
3730 $titleChanged = true;
3731 }
3732 # If there is no current revision, there is no page
3733 if ( $revRecord === null || $revRecord->getId() === null ) {
3734 $linkCache = $services->getLinkCache();
3735 $linkCache->addBadLinkObj( $title );
3736 }
3737 if ( $revRecord ) {
3738 if ( $titleChanged && !$revRecord->hasSlot( SlotRecord::MAIN ) ) {
3739 // We've added this (missing) title to the dependencies;
3740 // give the hook another chance to redirect it to an
3741 // actual page.
3742 $text = false;
3743 $finalTitle = $title;
3744 continue;
3745 }
3746 if ( $revRecord->hasSlot( SlotRecord::MAIN ) ) { // T276476
3747 $content = $revRecord->getContent( SlotRecord::MAIN );
3748 $text = $content ? $content->getWikitextForTransclusion() : null;
3749 } else {
3750 $text = false;
3751 }
3752
3753 if ( $text === false || $text === null ) {
3754 $text = false;
3755 break;
3756 }
3757 } elseif ( $title->getNamespace() === NS_MEDIAWIKI ) {
3758 $message = wfMessage( $services->getContentLanguage()->
3759 lcfirst( $title->getText() ) )->inContentLanguage();
3760 if ( !$message->exists() ) {
3761 $text = false;
3762 break;
3763 }
3764 $text = $message->plain();
3765 break;
3766 } else {
3767 break;
3768 }
3769 // @phan-suppress-next-line PhanPossiblyUndeclaredVariable Only reached when content is set
3770 if ( !$content ) {
3771 break;
3772 }
3773 # Redirect?
3774 $finalTitle = $title;
3775 $title = $content->getRedirectTarget();
3776 }
3777
3778 $retValues = [
3779 // previously, when this also returned a Revision object, we set
3780 // 'revision-record' to false instead of null if it was unavailable,
3781 // so that callers to use isset and then rely on the revision-record
3782 // key instead of the revision key, even if there was no corresponding
3783 // object - we continue to set to false here for backwards compatability
3784 'revision-record' => $revRecord ?: false,
3785 'text' => $text,
3786 'finalTitle' => $finalTitle,
3787 'deps' => $deps
3788 ];
3789 return $retValues;
3790 }
3791
3800 public function fetchFileAndTitle( LinkTarget $link, array $options = [] ) {
3801 $file = $this->fetchFileNoRegister( $link, $options );
3802
3803 $time = $file ? $file->getTimestamp() : false;
3804 $sha1 = $file ? $file->getSha1() : false;
3805 # Register the file as a dependency...
3806 $this->mOutput->addImage( $link, $time, $sha1 );
3807 if ( $file && !$link->isSameLinkAs( $file->getTitle() ) ) {
3808 # Update fetched file title after resolving redirects, etc.
3809 $link = $file->getTitle();
3810 $this->mOutput->addImage( $link, $time, $sha1 );
3811 }
3812
3813 $title = Title::newFromLinkTarget( $link ); // for return type compat
3814 return [ $file, $title ];
3815 }
3816
3827 protected function fetchFileNoRegister( LinkTarget $link, array $options = [] ) {
3828 if ( isset( $options['broken'] ) ) {
3829 $file = false; // broken thumbnail forced by hook
3830 } else {
3831 $repoGroup = MediaWikiServices::getInstance()->getRepoGroup();
3832 if ( isset( $options['sha1'] ) ) { // get by (sha1,timestamp)
3833 $file = $repoGroup->findFileFromKey( $options['sha1'], $options );
3834 } else { // get by (name,timestamp)
3835 $file = $repoGroup->findFile( $link, $options );
3836 }
3837 }
3838 return $file;
3839 }
3840
3850 public function interwikiTransclude( LinkTarget $link, $action ) {
3851 if ( !$this->svcOptions->get( MainConfigNames::EnableScaryTranscluding ) ) {
3852 return wfMessage( 'scarytranscludedisabled' )->inContentLanguage()->text();
3853 }
3854
3855 // TODO: extract relevant functionality from Title
3856 $title = Title::newFromLinkTarget( $link );
3857
3858 $url = $title->getFullURL( [ 'action' => $action ] );
3859 if ( strlen( $url ) > 1024 ) {
3860 return wfMessage( 'scarytranscludetoolong' )->inContentLanguage()->text();
3861 }
3862
3863 $wikiId = $title->getTransWikiID(); // remote wiki ID or false
3864
3865 $fname = __METHOD__;
3866
3867 $cache = $this->wanCache;
3868 $data = $cache->getWithSetCallback(
3869 $cache->makeGlobalKey(
3870 'interwiki-transclude',
3871 ( $wikiId !== false ) ? $wikiId : 'external',
3872 sha1( $url )
3873 ),
3874 $this->svcOptions->get( MainConfigNames::TranscludeCacheExpiry ),
3875 function ( $oldValue, &$ttl ) use ( $url, $fname, $cache ) {
3876 $req = $this->httpRequestFactory->create( $url, [], $fname );
3877
3878 $status = $req->execute(); // Status object
3879 if ( !$status->isOK() ) {
3880 $ttl = $cache::TTL_UNCACHEABLE;
3881 } elseif ( $req->getResponseHeader( 'X-Database-Lagged' ) !== null ) {
3882 $ttl = min( $cache::TTL_LAGGED, $ttl );
3883 }
3884
3885 return [
3886 'text' => $status->isOK() ? $req->getContent() : null,
3887 'code' => $req->getStatus()
3888 ];
3889 },
3890 [
3891 'checkKeys' => ( $wikiId !== false )
3892 ? [ $cache->makeGlobalKey( 'interwiki-page', $wikiId, $title->getDBkey() ) ]
3893 : [],
3894 'pcGroup' => 'interwiki-transclude:5',
3895 'pcTTL' => $cache::TTL_PROC_LONG
3896 ]
3897 );
3898
3899 if ( is_string( $data['text'] ) ) {
3900 $text = $data['text'];
3901 } elseif ( $data['code'] != 200 ) {
3902 // Though we failed to fetch the content, this status is useless.
3903 $text = wfMessage( 'scarytranscludefailed-httpstatus' )
3904 ->params( $url, $data['code'] )->inContentLanguage()->text();
3905 } else {
3906 $text = wfMessage( 'scarytranscludefailed', $url )->inContentLanguage()->text();
3907 }
3908
3909 return $text;
3910 }
3911
3921 public function argSubstitution( array $piece, PPFrame $frame ) {
3922 $error = false;
3923 $parts = $piece['parts'];
3924 $nameWithSpaces = $frame->expand( $piece['title'] );
3925 $argName = trim( $nameWithSpaces );
3926 $object = false;
3927 $text = $frame->getArgument( $argName );
3928 if ( $text === false && $parts->getLength() > 0
3929 && ( $this->ot['html']
3930 || $this->ot['pre']
3931 || ( $this->ot['wiki'] && $frame->isTemplate() )
3932 )
3933 ) {
3934 # No match in frame, use the supplied default
3935 $object = $parts->item( 0 )->getChildren();
3936 }
3937 if ( !$this->incrementIncludeSize( 'arg', strlen( $text ) ) ) {
3938 $error = '<!-- WARNING: argument omitted, expansion size too large -->';
3939 $this->limitationWarn( 'post-expand-template-argument' );
3940 }
3941
3942 if ( $text === false && $object === false ) {
3943 # No match anywhere
3944 $object = $frame->virtualBracketedImplode( '{{{', '|', '}}}', $nameWithSpaces, $parts );
3945 }
3946 if ( $error !== false ) {
3947 $text .= $error;
3948 }
3949 if ( $object !== false ) {
3950 $ret = [ 'object' => $object ];
3951 } else {
3952 $ret = [ 'text' => $text ];
3953 }
3954
3955 return $ret;
3956 }
3957
3962 public function tagNeedsNowikiStrippedInTagPF( string $lowerTagName ): bool {
3963 $parsoidSiteConfig = MediaWikiServices::getInstance()->getParsoidSiteConfig();
3964 return $parsoidSiteConfig->tagNeedsNowikiStrippedInTagPF( $lowerTagName );
3965 }
3966
3986 public function extensionSubstitution( array $params, PPFrame $frame, bool $processNowiki = false ) {
3987 static $errorStr = '<span class="error">';
3988
3989 $name = $frame->expand( $params['name'] );
3990 if ( str_starts_with( $name, $errorStr ) ) {
3991 // Probably expansion depth or node count exceeded. Just punt the
3992 // error up.
3993 return $name;
3994 }
3995
3996 // Parse attributes from XML-like wikitext syntax
3997 $attrText = !isset( $params['attr'] ) ? '' : $frame->expand( $params['attr'] );
3998 if ( str_starts_with( $attrText, $errorStr ) ) {
3999 // See above
4000 return $attrText;
4001 }
4002
4003 // We can't safely check if the expansion for $content resulted in an
4004 // error, because the content could happen to be the error string
4005 // (T149622).
4006 $content = !isset( $params['inner'] ) ? null : $frame->expand( $params['inner'] );
4007
4008 $marker = self::MARKER_PREFIX . "-$name-"
4009 . sprintf( '%08X', $this->mMarkerIndex++ ) . self::MARKER_SUFFIX;
4010
4011 $normalizedName = strtolower( $name );
4012 $isNowiki = $normalizedName === 'nowiki';
4013 $markerType = $isNowiki ? 'nowiki' : 'general';
4014 if ( $this->ot['html'] || ( $processNowiki && $isNowiki ) ) {
4015 $attributes = Sanitizer::decodeTagAttributes( $attrText );
4016 // Merge in attributes passed via {{#tag:}} parser function
4017 if ( isset( $params['attributes'] ) ) {
4018 $attributes += $params['attributes'];
4019 }
4020
4021 if ( isset( $this->mTagHooks[$normalizedName] ) ) {
4022 // Note that $content may be null here, for example if the
4023 // tag is self-closed.
4024 $output = call_user_func_array( $this->mTagHooks[$normalizedName],
4025 [ $content, $attributes, $this, $frame ] );
4026 } else {
4027 $output = '<span class="error">Invalid tag extension name: ' .
4028 htmlspecialchars( $normalizedName ) . '</span>';
4029 }
4030
4031 if ( is_array( $output ) ) {
4032 // Extract flags
4033 $flags = $output;
4034 $output = $flags[0];
4035 if ( isset( $flags['markerType'] ) ) {
4036 $markerType = $flags['markerType'];
4037 }
4038 }
4039 } else {
4040 // We're substituting a {{subst:#tag:}} parser function.
4041 // Convert the attributes it passed into the XML-like string.
4042 if ( isset( $params['attributes'] ) ) {
4043 foreach ( $params['attributes'] as $attrName => $attrValue ) {
4044 $attrText .= ' ' . htmlspecialchars( $attrName ) . '="' .
4045 htmlspecialchars( $this->getStripState()->unstripBoth( $attrValue ), ENT_COMPAT ) . '"';
4046 }
4047 }
4048 if ( $content === null ) {
4049 $output = "<$name$attrText/>";
4050 } else {
4051 $close = $params['close'] === null ? '' : $frame->expand( $params['close'] );
4052 if ( str_starts_with( $close, $errorStr ) ) {
4053 // See above
4054 return $close;
4055 }
4056 $output = "<$name$attrText>$content$close";
4057 }
4058 }
4059
4060 if ( $markerType === 'none' ) {
4061 return $output;
4062 } elseif ( $markerType === 'nowiki' ) {
4063 $this->mStripState->addNoWiki( $marker, $output );
4064 } elseif ( $markerType === 'general' ) {
4065 $this->mStripState->addGeneral( $marker, $output );
4066 } else {
4067 throw new UnexpectedValueException( __METHOD__ . ': invalid marker type' );
4068 }
4069 return $marker;
4070 }
4071
4079 private function incrementIncludeSize( $type, $size ) {
4080 if ( $this->mIncludeSizes[$type] + $size > $this->mOptions->getMaxIncludeSize() ) {
4081 return false;
4082 } else {
4083 $this->mIncludeSizes[$type] += $size;
4084 return true;
4085 }
4086 }
4087
4093 $this->mExpensiveFunctionCount++;
4094 return $this->mExpensiveFunctionCount <= $this->mOptions->getExpensiveParserFunctionLimit();
4095 }
4096
4104 private function handleDoubleUnderscore( $text ) {
4105 # The position of __TOC__ needs to be recorded
4106 $mw = $this->magicWordFactory->get( 'toc' );
4107 if ( $mw->match( $text ) ) {
4108 $this->mShowToc = true;
4109 $this->mForceTocPosition = true;
4110
4111 # Set a placeholder. At the end we'll fill it in with the TOC.
4112 $text = $mw->replace( self::TOC_PLACEHOLDER, $text, 1 );
4113
4114 # Only keep the first one.
4115 $text = $mw->replace( '', $text );
4116 # For consistency with all other double-underscores
4117 # (see below)
4118 $this->mOutput->setUnsortedPageProperty( 'toc' );
4119 }
4120
4121 # Now match and remove the rest of them
4122 $mwa = $this->magicWordFactory->getDoubleUnderscoreArray();
4123 $this->mDoubleUnderscores = $mwa->matchAndRemove( $text );
4124
4125 if ( isset( $this->mDoubleUnderscores['nogallery'] ) ) {
4126 $this->mOutput->setNoGallery( true );
4127 }
4128 if ( isset( $this->mDoubleUnderscores['notoc'] ) && !$this->mForceTocPosition ) {
4129 $this->mShowToc = false;
4130 }
4131 if ( isset( $this->mDoubleUnderscores['hiddencat'] )
4132 && $this->getTitle()->getNamespace() === NS_CATEGORY
4133 ) {
4134 $this->addTrackingCategory( 'hidden-category-category' );
4135 }
4136 # (T10068) Allow control over whether robots index a page.
4137 # __INDEX__ always overrides __NOINDEX__, see T16899
4138 if ( isset( $this->mDoubleUnderscores['noindex'] ) && $this->getTitle()->canUseNoindex() ) {
4139 $this->mOutput->setIndexPolicy( 'noindex' );
4140 $this->addTrackingCategory( 'noindex-category' );
4141 }
4142 if ( isset( $this->mDoubleUnderscores['index'] ) && $this->getTitle()->canUseNoindex() ) {
4143 $this->mOutput->setIndexPolicy( 'index' );
4144 $this->addTrackingCategory( 'index-category' );
4145 }
4146
4147 # Cache all double underscores in the database
4148 foreach ( $this->mDoubleUnderscores as $key => $val ) {
4149 $this->mOutput->setUnsortedPageProperty( $key );
4150 }
4151
4152 return $text;
4153 }
4154
4161 public function addTrackingCategory( $msg ) {
4162 return $this->trackingCategories->addTrackingCategory(
4163 $this->mOutput, $msg, $this->getPage()
4164 );
4165 }
4166
4180 public function msg( string $msg, ...$args ): Message {
4181 return wfMessage( $msg, ...$args )
4182 ->inLanguage( $this->getTargetLanguage() )
4183 ->page( $this->getPage() );
4184 }
4185
4186 private function cleanUpTocLine( Node $container ) {
4187 '@phan-var Element|DocumentFragment $container'; // @var Element|DocumentFragment $container
4188 # Strip out HTML
4189 # Allowed tags are:
4190 # * <sup> and <sub> (T10393)
4191 # * <i> (T28375)
4192 # * <b> (r105284)
4193 # * <bdi> (T74884)
4194 # * <span dir="rtl"> and <span dir="ltr"> (T37167)
4195 # * <s> and <strike> (T35715)
4196 # * <q> (T251672)
4197 # We strip any parameter from accepted tags, except dir="rtl|ltr" from <span>,
4198 # to allow setting directionality in toc items.
4199 $allowedTags = [ 'span', 'sup', 'sub', 'bdi', 'i', 'b', 's', 'strike', 'q' ];
4200 $node = $container->firstChild;
4201 while ( $node !== null ) {
4202 $next = $node->nextSibling;
4203 if ( $node instanceof Element ) {
4204 $nodeName = DOMCompat::nodeName( $node );
4205 if ( in_array( $nodeName, [ 'style', 'script' ], true ) ) {
4206 # Remove any <style> or <script> tags (T198618)
4207 DOMCompat::remove( $node );
4208 } elseif ( in_array( $nodeName, $allowedTags, true ) ) {
4209 // Keep tag, remove attributes
4210 $removeAttrs = [];
4211 foreach ( $node->attributes as $attr ) {
4212 if (
4213 $nodeName === 'span' && $attr->name === 'dir'
4214 && ( $attr->value === 'rtl' || $attr->value === 'ltr' )
4215 ) {
4216 // Keep <span dir="rtl"> and <span dir="ltr">
4217 continue;
4218 }
4219 $removeAttrs[] = $attr;
4220 }
4221 foreach ( $removeAttrs as $attr ) {
4222 $node->removeAttributeNode( $attr );
4223 }
4224 $this->cleanUpTocLine( $node );
4225 # Strip '<span></span>', which is the result from the above if
4226 # <span id="foo"></span> is used to produce an additional anchor
4227 # for a section.
4228 if ( $nodeName === 'span' && !$node->hasChildNodes() ) {
4229 DOMCompat::remove( $node );
4230 }
4231 } else {
4232 // Strip tag
4233 $next = $node->firstChild;
4234 // phpcs:ignore Generic.CodeAnalysis.AssignmentInCondition.FoundInWhileCondition
4235 while ( $childNode = $node->firstChild ) {
4236 $node->parentNode->insertBefore( $childNode, $node );
4237 }
4238 DOMCompat::remove( $node );
4239 }
4240 } elseif ( $node instanceof Comment ) {
4241 // Extensions may add comments to headings;
4242 // these shouldn't appear in the ToC either.
4243 DOMCompat::remove( $node );
4244 }
4245 $node = $next;
4246 }
4247 }
4248
4264 private function finalizeHeadings( $text, $origText, $isMain = true ) {
4265 # Inhibit editsection links if requested in the page
4266 if ( isset( $this->mDoubleUnderscores['noeditsection'] ) ) {
4267 $maybeShowEditLink = false;
4268 } else {
4269 $maybeShowEditLink = true; /* Actual presence will depend on post-cache transforms */
4270 }
4271
4272 # Get all headlines for numbering them and adding funky stuff like [edit]
4273 # links - this is for later, but we need the number of headlines right now
4274 # NOTE: white space in headings have been trimmed in handleHeadings. They shouldn't
4275 # be trimmed here since whitespace in HTML headings is significant.
4276 $matches = [];
4277 $numMatches = preg_match_all(
4278 '/<H(?P<level>[1-6])(?P<attrib>.*?>)(?P<header>[\s\S]*?)<\/H[1-6] *>/i',
4279 $text,
4280 $matches
4281 );
4282
4283 # if there are fewer than 4 headlines in the article, do not show TOC
4284 # unless it's been explicitly enabled.
4285 $enoughToc = $this->mShowToc &&
4286 ( ( $numMatches >= 4 ) || $this->mForceTocPosition );
4287
4288 # Allow user to stipulate that a page should have a "new section"
4289 # link added via __NEWSECTIONLINK__
4290 if ( isset( $this->mDoubleUnderscores['newsectionlink'] ) ) {
4291 $this->mOutput->setNewSection( true );
4292 }
4293
4294 # Allow user to remove the "new section"
4295 # link via __NONEWSECTIONLINK__
4296 if ( isset( $this->mDoubleUnderscores['nonewsectionlink'] ) ) {
4297 $this->mOutput->setHideNewSection( true );
4298 }
4299
4300 # if the string __FORCETOC__ (not case-sensitive) occurs in the HTML,
4301 # override above conditions and always show TOC above first header
4302 if ( isset( $this->mDoubleUnderscores['forcetoc'] ) ) {
4303 $this->mShowToc = true;
4304 $enoughToc = true;
4305 }
4306
4307 # headline counter
4308 $headlineCount = 0;
4309 $haveTocEntries = false;
4310
4311 # Ugh .. the TOC should have neat indentation levels which can be
4312 # passed to the skin functions. These are determined here
4313 $full = '';
4314 $head = [];
4315 $level = 0;
4316 $tocData = new TOCData();
4317 $markerRegex = self::MARKER_PREFIX . "-h-(\d+)-" . self::MARKER_SUFFIX;
4318 $baseTitleText = $this->getTitle()->getPrefixedDBkey();
4319 $oldType = $this->mOutputType;
4320 $this->setOutputType( self::OT_WIKI );
4321 $frame = $this->getPreprocessor()->newFrame();
4322 $root = $this->preprocessToDom( $origText );
4323 $node = $root->getFirstChild();
4324 $cpOffset = 0;
4325 $refers = [];
4326
4327 $headlines = $numMatches !== false ? $matches[3] : [];
4328
4329 $maxTocLevel = $this->svcOptions->get( MainConfigNames::MaxTocLevel );
4330 $domDocument = DOMUtils::parseHTML( '' );
4331 foreach ( $headlines as $headline ) {
4332 $isTemplate = false;
4333 $titleText = false;
4334 $sectionIndex = false;
4335 $markerMatches = [];
4336 if ( preg_match( "/^$markerRegex/", $headline, $markerMatches ) ) {
4337 $serial = (int)$markerMatches[1];
4338 [ $titleText, $sectionIndex ] = $this->mHeadings[$serial];
4339 $isTemplate = ( $titleText != $baseTitleText );
4340 $headline = preg_replace( "/^$markerRegex\\s*/", "", $headline );
4341 }
4342
4343 $sectionMetadata = SectionMetadata::fromLegacy( [
4344 "fromtitle" => $titleText ?: null,
4345 "index" => $sectionIndex === false
4346 ? '' : ( ( $isTemplate ? 'T-' : '' ) . $sectionIndex )
4347 ] );
4348 $tocData->addSection( $sectionMetadata );
4349
4350 $oldLevel = $level;
4351 $level = (int)$matches[1][$headlineCount];
4352 $tocData->processHeading( $oldLevel, $level, $sectionMetadata );
4353
4354 if ( $tocData->getCurrentTOCLevel() < $maxTocLevel ) {
4355 $haveTocEntries = true;
4356 }
4357
4358 # The safe header is a version of the header text safe to use for links
4359
4360 # Remove link placeholders by the link text.
4361 # <!--LINK number-->
4362 # turns into
4363 # link text with suffix
4364 # Do this before unstrip since link text can contain strip markers
4365 $safeHeadline = $this->replaceLinkHoldersText( $headline );
4366
4367 # Avoid insertion of weird stuff like <math> by expanding the relevant sections
4368 $safeHeadline = $this->mStripState->unstripBoth( $safeHeadline );
4369
4370 // Run Tidy to convert wikitext entities to HTML entities (T355386),
4371 // conveniently also giving us a way to handle French spaces (T324763)
4372 $safeHeadline = $this->tidy->tidy( $safeHeadline, [ Sanitizer::class, 'armorFrenchSpaces' ] );
4373
4374 // Parse the heading contents as HTML. This makes it easier to strip out some HTML tags,
4375 // and ensures that we generate balanced HTML at the end (T218330).
4376 $headlineDom = DOMUtils::parseHTMLToFragment( $domDocument, $safeHeadline );
4377
4378 $this->cleanUpTocLine( $headlineDom );
4379
4380 // Serialize back to HTML
4381 $tocline = trim( DOMUtils::getFragmentInnerHTML( $headlineDom ) );
4382
4383 # For the anchor, strip out HTML-y stuff period
4384 $safeHeadline = trim( $headlineDom->textContent );
4385 # Save headline for section edit hint before it's normalized for the link
4386 $headlineHint = htmlspecialchars( $safeHeadline );
4387
4388 $safeHeadline = Sanitizer::normalizeSectionNameWhitespace( $safeHeadline );
4389 $safeHeadline = self::normalizeSectionName( $safeHeadline );
4390
4391 $fallbackHeadline = Sanitizer::escapeIdForAttribute( $safeHeadline, Sanitizer::ID_FALLBACK );
4392 $linkAnchor = Sanitizer::escapeIdForLink( $safeHeadline );
4393 $safeHeadline = Sanitizer::escapeIdForAttribute( $safeHeadline, Sanitizer::ID_PRIMARY );
4394 if ( $fallbackHeadline === $safeHeadline ) {
4395 # No reason to have both (in fact, we can't)
4396 $fallbackHeadline = false;
4397 }
4398
4399 # HTML IDs must be case-insensitively unique for IE compatibility (T12721).
4400 $arrayKey = strtolower( $safeHeadline );
4401 if ( $fallbackHeadline === false ) {
4402 $fallbackArrayKey = false;
4403 } else {
4404 $fallbackArrayKey = strtolower( $fallbackHeadline );
4405 }
4406
4407 # Create the anchor for linking from the TOC to the section
4408 $anchor = $safeHeadline;
4409 $fallbackAnchor = $fallbackHeadline;
4410 if ( isset( $refers[$arrayKey] ) ) {
4411 for ( $i = 2; isset( $refers["{$arrayKey}_$i"] ); ++$i );
4412 $anchor .= "_$i";
4413 $linkAnchor .= "_$i";
4414 $refers["{$arrayKey}_$i"] = true;
4415 } else {
4416 $refers[$arrayKey] = true;
4417 }
4418 if ( $fallbackHeadline !== false && isset( $refers[$fallbackArrayKey] ) ) {
4419 for ( $i = 2; isset( $refers["{$fallbackArrayKey}_$i"] ); ++$i );
4420 $fallbackAnchor .= "_$i";
4421 $refers["{$fallbackArrayKey}_$i"] = true;
4422 } else {
4423 $refers[$fallbackArrayKey] = true;
4424 }
4425
4426 # Add the section to the section tree
4427 # Find the DOM node for this header
4428 $noOffset = ( $isTemplate || $sectionIndex === false );
4429 while ( $node && !$noOffset ) {
4430 if ( $node->getName() === 'h' ) {
4431 $bits = $node->splitHeading();
4432 if ( $bits['i'] == $sectionIndex ) {
4433 break;
4434 }
4435 }
4436 $cpOffset += mb_strlen(
4437 $this->mStripState->unstripBoth(
4438 $frame->expand( $node, PPFrame::RECOVER_ORIG )
4439 )
4440 );
4441 $node = $node->getNextSibling();
4442 }
4443 $sectionMetadata->line = $tocline;
4444 $sectionMetadata->codepointOffset = ( $noOffset ? null : $cpOffset );
4445 $sectionMetadata->anchor = $anchor;
4446 $sectionMetadata->linkAnchor = $linkAnchor;
4447
4448 if ( $maybeShowEditLink && $sectionIndex !== false ) {
4449 // Output edit section links as markers with styles that can be customized by skins
4450 if ( $isTemplate ) {
4451 # Put a T flag in the section identifier, to indicate to extractSections()
4452 # that sections inside <includeonly> should be counted.
4453 $editsectionPage = $titleText;
4454 $editsectionSection = "T-$sectionIndex";
4455 } else {
4456 $editsectionPage = $this->getTitle()->getPrefixedText();
4457 $editsectionSection = $sectionIndex;
4458 }
4459 // Construct a pseudo-HTML tag as a placeholder for the section edit link. It is replaced in
4460 // MediaWiki\OutputTransform\Stages\HandleSectionLinks with the real link.
4461 //
4462 // Any HTML markup in the input has already been escaped,
4463 // so we don't have to worry about a user trying to input one of these markers directly.
4464 //
4465 // We put the page and section in attributes to stop the language converter from
4466 // converting them, but put the headline hint in tag content
4467 // because it is supposed to be able to convert that.
4468 $editlink = '<mw:editsection page="' . htmlspecialchars( $editsectionPage, ENT_COMPAT );
4469 $editlink .= '" section="' . htmlspecialchars( $editsectionSection, ENT_COMPAT ) . '"';
4470 $editlink .= '>' . $headlineHint . '</mw:editsection>';
4471 } else {
4472 $editlink = '';
4473 }
4474 // Reconstruct the original <h#> tag with added attributes. It is replaced in
4475 // MediaWiki\OutputTransform\Stages\HandleSectionLinks to add anchors and stuff.
4476 //
4477 // data-mw-... attributes are forbidden in Sanitizer::isReservedDataAttribute(),
4478 // so we don't have to worry about a user trying to input one of these markers directly.
4479 //
4480 // We put the anchors in attributes to stop the language converter from converting them.
4481 $head[$headlineCount] = "<h$level" . Html::expandAttributes( [
4482 'data-mw-anchor' => $anchor,
4483 'data-mw-fallback-anchor' => $fallbackAnchor,
4484 ] ) . $matches['attrib'][$headlineCount] . $headline . $editlink . "</h$level>";
4485
4486 $headlineCount++;
4487 }
4488
4489 $this->setOutputType( $oldType );
4490
4491 # Never ever show TOC if no headers (or suppressed)
4492 $suppressToc = $this->mOptions->getSuppressTOC();
4493 if ( !$haveTocEntries ) {
4494 $enoughToc = false;
4495 }
4496 $addTOCPlaceholder = false;
4497
4498 if ( $isMain && !$suppressToc ) {
4499 // We generally output the section information via the API
4500 // even if there isn't "enough" of a ToC to merit showing
4501 // it -- but the "suppress TOC" parser option is set when
4502 // any sections that might be found aren't "really there"
4503 // (ie, JavaScript content that might have spurious === or
4504 // <h2>: T307691) so we will *not* set section information
4505 // in that case.
4506 $this->mOutput->setTOCData( $tocData );
4507
4508 // T294950: Record a suggestion that the TOC should be shown.
4509 // Skins are free to ignore this suggestion and implement their
4510 // own criteria for showing/suppressing TOC (T318186).
4511 if ( $enoughToc ) {
4512 $this->mOutput->setOutputFlag( ParserOutputFlags::SHOW_TOC );
4513 if ( !$this->mForceTocPosition ) {
4514 $addTOCPlaceholder = true;
4515 }
4516 }
4517
4518 // If __NOTOC__ is used on the page (and not overridden by
4519 // __TOC__ or __FORCETOC__) set the NO_TOC flag to tell
4520 // the skin that although the section information is
4521 // valid, it should perhaps not be presented as a Table Of
4522 // Contents.
4523 if ( !$this->mShowToc ) {
4524 $this->mOutput->setOutputFlag( ParserOutputFlags::NO_TOC );
4525 }
4526 }
4527
4528 # split up and insert constructed headlines
4529 $blocks = preg_split( '/<h[1-6]\b[^>]*>.*?<\/h[1-6]>/is', $text );
4530 $i = 0;
4531
4532 // build an array of document sections
4533 $sections = [];
4534 foreach ( $blocks as $block ) {
4535 // $head is zero-based, sections aren't.
4536 if ( empty( $head[$i - 1] ) ) {
4537 $sections[$i] = $block;
4538 } else {
4539 $sections[$i] = $head[$i - 1] . $block;
4540 }
4541
4542 $i++;
4543 }
4544
4545 if ( $addTOCPlaceholder ) {
4546 // append the TOC at the beginning
4547 // Top anchor now in skin
4548 // @phan-suppress-next-line PhanTypePossiblyInvalidDimOffset At least one element when enoughToc is true
4549 $sections[0] .= self::TOC_PLACEHOLDER . "\n";
4550 }
4551
4552 $full .= implode( '', $sections );
4553
4554 return $full;
4555 }
4556
4566 private static function localizeTOC(
4567 ?TOCData $tocData, Language $lang, ?ILanguageConverter $converter
4568 ) {
4569 if ( $tocData === null ) {
4570 return; // Nothing to do
4571 }
4572 foreach ( $tocData->getSections() as $s ) {
4573 // Localize heading
4574 if ( $converter ) {
4575 // T331316: don't use 'convert' or 'convertTo' as these reset
4576 // the language converter state.
4577 $s->line = $converter->convertTo(
4578 $s->line, $converter->getPreferredVariant(), false
4579 );
4580 }
4581 // Localize numbering
4582 $dot = '.';
4583 $pieces = explode( $dot, $s->number );
4584 $numbering = '';
4585 foreach ( $pieces as $i => $p ) {
4586 if ( $i > 0 ) {
4587 $numbering .= $dot;
4588 }
4589 $numbering .= $lang->formatNum( $p );
4590 }
4591 $s->number = $numbering;
4592 }
4593 }
4594
4607 public function preSaveTransform(
4608 $text,
4609 PageReference $page,
4610 UserIdentity $user,
4611 ParserOptions $options,
4612 $clearState = true
4613 ) {
4614 if ( $clearState ) {
4615 $magicScopeVariable = $this->lock();
4616 }
4617 $this->startParse( $page, $options, self::OT_WIKI, $clearState );
4618 $this->setUser( $user );
4619
4620 // Strip U+0000 NULL (T159174)
4621 $text = str_replace( "\000", '', $text );
4622
4623 // We still normalize line endings (including trimming trailing whitespace) for
4624 // backwards-compatibility with other code that just calls PST, but this should already
4625 // be handled in TextContent subclasses
4626 $text = TextContent::normalizeLineEndings( $text );
4627
4628 if ( $options->getPreSaveTransform() ) {
4629 $text = $this->pstPass2( $text, $user );
4630 }
4631 $text = $this->mStripState->unstripBoth( $text );
4632
4633 // Trim trailing whitespace again, because the previous steps can introduce it.
4634 $text = rtrim( $text );
4635
4636 $this->hookRunner->onParserPreSaveTransformComplete( $this, $text );
4637
4638 $this->setUser( null ); # Reset
4639
4640 return $text;
4641 }
4642
4651 private function pstPass2( $text, UserIdentity $user ) {
4652 # Note: This is the timestamp saved as hardcoded wikitext to the database, we use
4653 # $this->contLang here in order to give everyone the same signature and use the default one
4654 # rather than the one selected in each user's preferences. (see also T14815)
4655 $ts = $this->mOptions->getTimestamp();
4656 $timestamp = MWTimestamp::getLocalInstance( $ts );
4657 $ts = $timestamp->format( 'YmdHis' );
4658 $tzMsg = $timestamp->getTimezoneMessage()->inContentLanguage()->text();
4659
4660 $d = $this->contLang->timeanddate( $ts, false, false ) . " ($tzMsg)";
4661
4662 # Variable replacement
4663 # Because mOutputType is OT_WIKI, this will only process {{subst:xxx}} type tags
4664 $text = $this->replaceVariables( $text );
4665
4666 # This works almost by chance, as the replaceVariables are done before the getUserSig(),
4667 # which may corrupt this parser instance via its wfMessage()->text() call-
4668
4669 # Signatures
4670 if ( strpos( $text, '~~~' ) !== false ) {
4671 $sigText = $this->getUserSig( $user );
4672 $text = strtr( $text, [
4673 '~~~~~' => $d,
4674 '~~~~' => "$sigText $d",
4675 '~~~' => $sigText
4676 ] );
4677 # The main two signature forms used above are time-sensitive
4678 $this->setOutputFlag( ParserOutputFlags::USER_SIGNATURE, 'User signature detected' );
4679 }
4680
4681 # Context links ("pipe tricks"): [[|name]] and [[name (context)|]]
4682 $tc = '[' . Title::legalChars() . ']';
4683 $nc = '[ _0-9A-Za-z\x80-\xff-]'; # Namespaces can use non-ascii!
4684
4685 // [[ns:page (context)|]]
4686 $p1 = "/\[\[(:?$nc+:|:|)($tc+?)( ?\\($tc+\\))\\|]]/";
4687 // [[ns:page(context)|]] (double-width brackets, added in r40257)
4688 $p4 = "/\[\[(:?$nc+:|:|)($tc+?)( ?($tc+))\\|]]/";
4689 // [[ns:page (context), context|]] (using single, double-width or Arabic comma)
4690 $p3 = "/\[\[(:?$nc+:|:|)($tc+?)( ?\\($tc+\\)|)((?:, |,|، )$tc+|)\\|]]/";
4691 // [[|page]] (reverse pipe trick: add context from page title)
4692 $p2 = "/\[\[\\|($tc+)]]/";
4693
4694 # try $p1 first, to turn "[[A, B (C)|]]" into "[[A, B (C)|A, B]]"
4695 $text = preg_replace( $p1, '[[\\1\\2\\3|\\2]]', $text );
4696 $text = preg_replace( $p4, '[[\\1\\2\\3|\\2]]', $text );
4697 $text = preg_replace( $p3, '[[\\1\\2\\3\\4|\\2]]', $text );
4698
4699 $t = $this->getTitle()->getText();
4700 $m = [];
4701 if ( preg_match( "/^($nc+:|)$tc+?( \\($tc+\\))$/", $t, $m ) ) {
4702 $text = preg_replace( $p2, "[[$m[1]\\1$m[2]|\\1]]", $text );
4703 } elseif ( preg_match( "/^($nc+:|)$tc+?(, $tc+|)$/", $t, $m ) && "$m[1]$m[2]" != '' ) {
4704 $text = preg_replace( $p2, "[[$m[1]\\1$m[2]|\\1]]", $text );
4705 } else {
4706 # if there's no context, don't bother duplicating the title
4707 $text = preg_replace( $p2, '[[\\1]]', $text );
4708 }
4709
4710 return $text;
4711 }
4712
4728 public function getUserSig( UserIdentity $user, $nickname = false, $fancySig = null ) {
4729 $username = $user->getName();
4730
4731 # If not given, retrieve from the user object.
4732 if ( $nickname === false ) {
4733 $nickname = $this->userOptionsLookup->getOption( $user, 'nickname' );
4734 }
4735
4736 $fancySig ??= $this->userOptionsLookup->getBoolOption( $user, 'fancysig' );
4737
4738 if ( $nickname === null || $nickname === '' ) {
4739 // Empty value results in the default signature (even when fancysig is enabled)
4740 $nickname = $username;
4741 } elseif ( mb_strlen( $nickname ) > $this->svcOptions->get( MainConfigNames::MaxSigChars ) ) {
4742 $nickname = $username;
4743 $this->logger->debug( __METHOD__ . ": $username has overlong signature." );
4744 } elseif ( $fancySig !== false ) {
4745 # Sig. might contain markup; validate this
4746 $isValid = $this->validateSig( $nickname ) !== false;
4747
4748 # New validator
4749 $sigValidation = $this->svcOptions->get( MainConfigNames::SignatureValidation );
4750 if ( $isValid && $sigValidation === 'disallow' ) {
4751 $parserOpts = new ParserOptions(
4752 $this->mOptions->getUserIdentity(),
4753 $this->contLang
4754 );
4755 $validator = $this->signatureValidatorFactory
4756 ->newSignatureValidator( $user, null, $parserOpts );
4757 $isValid = !$validator->validateSignature( $nickname );
4758 }
4759
4760 if ( $isValid ) {
4761 # Validated; clean up (if needed) and return it
4762 return $this->cleanSig( $nickname, true );
4763 } else {
4764 # Failed to validate; fall back to the default
4765 $nickname = $username;
4766 $this->logger->debug( __METHOD__ . ": $username has invalid signature." );
4767 }
4768 }
4769
4770 # Make sure nickname doesnt get a sig in a sig
4771 $nickname = self::cleanSigInSig( $nickname );
4772
4773 # If we're still here, make it a link to the user page
4774 $userText = wfEscapeWikiText( $username );
4775 $nickText = wfEscapeWikiText( $nickname );
4776 if ( $this->userNameUtils->isTemp( $username ) ) {
4777 $msgName = 'signature-temp';
4778 } elseif ( $user->isRegistered() ) {
4779 $msgName = 'signature';
4780 } else {
4781 $msgName = 'signature-anon';
4782 }
4783
4784 return wfMessage( $msgName, $userText, $nickText )->inContentLanguage()
4785 ->page( $this->getPage() )->text();
4786 }
4787
4795 public function validateSig( $text ) {
4796 return Xml::isWellFormedXmlFragment( $text ) ? $text : false;
4797 }
4798
4810 public function cleanSig( $text, $parsing = false ) {
4811 if ( !$parsing ) {
4812 $magicScopeVariable = $this->lock();
4813 $this->startParse(
4814 $this->mTitle,
4815 ParserOptions::newFromUser( RequestContext::getMain()->getUser() ),
4816 self::OT_PREPROCESS,
4817 true
4818 );
4819 }
4820
4821 # Option to disable this feature
4822 if ( !$this->mOptions->getCleanSignatures() ) {
4823 return $text;
4824 }
4825
4826 # @todo FIXME: Regex doesn't respect extension tags or nowiki
4827 # => Move this logic to braceSubstitution()
4828 $substWord = $this->magicWordFactory->get( 'subst' );
4829 $substRegex = '/\{\{(?!(?:' . $substWord->getBaseRegex() . '))/x' . $substWord->getRegexCase();
4830 $substText = '{{' . $substWord->getSynonym( 0 );
4831
4832 $text = preg_replace( $substRegex, $substText, $text );
4833 $text = self::cleanSigInSig( $text );
4834 $dom = $this->preprocessToDom( $text );
4835 $frame = $this->getPreprocessor()->newFrame();
4836 $text = $frame->expand( $dom );
4837
4838 if ( !$parsing ) {
4839 $text = $this->mStripState->unstripBoth( $text );
4840 }
4841
4842 return $text;
4843 }
4844
4852 public static function cleanSigInSig( $text ) {
4853 $text = preg_replace( '/~{3,5}/', '', $text );
4854 return $text;
4855 }
4856
4873 public static function replaceTableOfContentsMarker( $text, $toc ) {
4874 $replaced = false;
4875 // remove the additional metas. while not strictly necessary, this also ensures idempotence if we run
4876 // the pass more than once on a given content and TOC markers are not inserted by $toc. At the same time,
4877 // if $toc inserts TOC markers (which, as of 2024-05, it shouldn't be able to), these are preserved by the
4878 // fact that we run a single pass with a callback (rather than doing a first replacement with the $toc and
4879 // a replacement of leftover markers as a second pass).
4880 $callback = static function ( array $matches ) use( &$replaced, $toc ): string {
4881 if ( !$replaced ) {
4882 $replaced = true;
4883 return $toc;
4884 }
4885 return '';
4886 };
4887
4888 return preg_replace_callback( self::TOC_PLACEHOLDER_REGEX, $callback, $text );
4889 }
4890
4902 public function startExternalParse( ?PageReference $page, ParserOptions $options,
4903 $outputType, $clearState = true, $revId = null
4904 ) {
4905 $this->startParse( $page, $options, $outputType, $clearState );
4906 if ( $revId !== null ) {
4907 $this->mRevisionId = $revId;
4908 }
4909 }
4910
4917 private function startParse( ?PageReference $page, ParserOptions $options,
4918 $outputType, $clearState = true
4919 ) {
4920 $this->setPage( $page );
4921 $this->mOptions = $options;
4922 $this->setOutputType( $outputType );
4923 if ( $clearState ) {
4924 $this->clearState();
4925 }
4926 }
4927
4937 public function transformMsg( $text, ParserOptions $options, ?PageReference $page = null ) {
4938 static $executing = false;
4939
4940 # Guard against infinite recursion
4941 if ( $executing ) {
4942 return $text;
4943 }
4944 $executing = true;
4945
4946 $text = $this->preprocess( $text, $page ?? $this->mTitle, $options );
4947
4948 $executing = false;
4949 return $text;
4950 }
4951
4971 public function setHook( $tag, callable $callback ) {
4972 $tag = strtolower( $tag );
4973 if ( preg_match( '/[<>\r\n]/', $tag, $m ) ) {
4974 throw new InvalidArgumentException( "Invalid character {$m[0]} in setHook('$tag', ...) call" );
4975 }
4976 $oldVal = $this->mTagHooks[$tag] ?? null;
4977 $this->mTagHooks[$tag] = $callback;
4978 if ( !in_array( $tag, $this->mStripList ) ) {
4979 $this->mStripList[] = $tag;
4980 }
4981
4982 return $oldVal;
4983 }
4984
4989 public function clearTagHooks() {
4990 $this->mTagHooks = [];
4991 $this->mStripList = [];
4992 }
4993
5037 public function setFunctionHook( $id, callable $callback, $flags = 0 ) {
5038 $oldVal = $this->mFunctionHooks[$id][0] ?? null;
5039 $this->mFunctionHooks[$id] = [ $callback, $flags ];
5040
5041 # Add to function cache
5042 $mw = $this->magicWordFactory->get( $id );
5043
5044 $synonyms = $mw->getSynonyms();
5045 $sensitive = intval( $mw->isCaseSensitive() );
5046
5047 foreach ( $synonyms as $syn ) {
5048 # Case
5049 if ( !$sensitive ) {
5050 $syn = $this->contLang->lc( $syn );
5051 }
5052 # Add leading hash
5053 if ( !( $flags & self::SFH_NO_HASH ) ) {
5054 $syn = '#' . $syn;
5055 }
5056 # Remove trailing colon
5057 if ( substr( $syn, -1, 1 ) === ':' ) {
5058 $syn = substr( $syn, 0, -1 );
5059 }
5060 $this->mFunctionSynonyms[$sensitive][$syn] = $id;
5061 }
5062 return $oldVal;
5063 }
5064
5071 public function getFunctionHooks() {
5072 return array_keys( $this->mFunctionHooks );
5073 }
5074
5082 public function replaceLinkHolders( &$text ) {
5083 $this->replaceLinkHoldersPrivate( $text );
5084 }
5085
5092 private function replaceLinkHoldersPrivate( &$text ) {
5093 $this->mLinkHolders->replace( $text );
5094 }
5095
5103 private function replaceLinkHoldersText( $text ) {
5104 return $this->mLinkHolders->replaceText( $text );
5105 }
5106
5121 public function renderImageGallery( $text, array $params ) {
5122 $mode = false;
5123 if ( isset( $params['mode'] ) ) {
5124 $mode = $params['mode'];
5125 }
5126
5127 try {
5128 $ig = ImageGalleryBase::factory( $mode );
5129 } catch ( ImageGalleryClassNotFoundException $e ) {
5130 // If invalid type set, fallback to default.
5131 $ig = ImageGalleryBase::factory( false );
5132 }
5133
5134 $ig->setContextTitle( $this->getTitle() );
5135 $ig->setShowBytes( false );
5136 $ig->setShowDimensions( false );
5137 $ig->setShowFilename( false );
5138 $ig->setParser( $this );
5139 $ig->setHideBadImages();
5140 $ig->setAttributes( Sanitizer::validateTagAttributes( $params, 'ul' ) );
5141
5142 if ( isset( $params['showfilename'] ) ) {
5143 $ig->setShowFilename( true );
5144 } else {
5145 $ig->setShowFilename( false );
5146 }
5147 if ( isset( $params['caption'] ) ) {
5148 // NOTE: We aren't passing a frame here or below. Frame info
5149 // is currently opaque to Parsoid, which acts on OT_PREPROCESS.
5150 // See T107332#4030581
5151 $caption = $this->recursiveTagParse( $params['caption'] );
5152 $ig->setCaptionHtml( $caption );
5153 }
5154 if ( isset( $params['perrow'] ) ) {
5155 $ig->setPerRow( $params['perrow'] );
5156 }
5157 if ( isset( $params['widths'] ) ) {
5158 $ig->setWidths( $params['widths'] );
5159 }
5160 if ( isset( $params['heights'] ) ) {
5161 $ig->setHeights( $params['heights'] );
5162 }
5163 $ig->setAdditionalOptions( $params );
5164
5165 $enableLegacyMediaDOM = $this->svcOptions->get( MainConfigNames::ParserEnableLegacyMediaDOM );
5166
5167 $lines = StringUtils::explode( "\n", $text );
5168 foreach ( $lines as $line ) {
5169 # match lines like these:
5170 # Image:someimage.jpg|This is some image
5171 $matches = [];
5172 preg_match( "/^([^|]+)(\\|(.*))?$/", $line, $matches );
5173 # Skip empty lines
5174 if ( count( $matches ) == 0 ) {
5175 continue;
5176 }
5177
5178 if ( strpos( $matches[0], '%' ) !== false ) {
5179 $matches[1] = rawurldecode( $matches[1] );
5180 }
5181 $title = Title::newFromText( $matches[1], NS_FILE );
5182 if ( $title === null ) {
5183 # Bogus title. Ignore these so we don't bomb out later.
5184 continue;
5185 }
5186
5187 # We need to get what handler the file uses, to figure out parameters.
5188 # Note, a hook can override the file name, and chose an entirely different
5189 # file (which potentially could be of a different type and have different handler).
5190 $options = [];
5191 $descQuery = false;
5192 $this->hookRunner->onBeforeParserFetchFileAndTitle(
5193 // @phan-suppress-next-line PhanTypeMismatchArgument Type mismatch on pass-by-ref args
5194 $this, $title, $options, $descQuery
5195 );
5196 # Don't register it now, as TraditionalImageGallery does that later.
5197 $file = $this->fetchFileNoRegister( $title, $options );
5198 $handler = $file ? $file->getHandler() : false;
5199
5200 $paramMap = [
5201 'img_alt' => 'gallery-internal-alt',
5202 'img_link' => 'gallery-internal-link',
5203 ];
5204 if ( $handler ) {
5205 $paramMap += $handler->getParamMap();
5206 // We don't want people to specify per-image widths.
5207 // Additionally the width parameter would need special casing anyhow.
5208 unset( $paramMap['img_width'] );
5209 }
5210
5211 $mwArray = $this->magicWordFactory->newArray( array_keys( $paramMap ) );
5212
5213 $label = '';
5214 $alt = null;
5215 $handlerOptions = [];
5216 $imageOptions = [];
5217 $hasAlt = false;
5218
5219 if ( isset( $matches[3] ) ) {
5220 // look for an |alt= definition while trying not to break existing
5221 // captions with multiple pipes (|) in it, until a more sensible grammar
5222 // is defined for images in galleries
5223
5224 // FIXME: Doing recursiveTagParse at this stage is a bit odd,
5225 // and different from makeImage.
5226 $matches[3] = $this->recursiveTagParse( $matches[3] );
5227 // Protect LanguageConverter markup
5228 $parameterMatches = StringUtils::delimiterExplode(
5229 '-{', '}-',
5230 '|',
5231 $matches[3],
5232 true /* nested */
5233 );
5234
5235 foreach ( $parameterMatches as $parameterMatch ) {
5236 [ $magicName, $match ] = $mwArray->matchVariableStartToEnd( trim( $parameterMatch ) );
5237 if ( !$magicName ) {
5238 // Last pipe wins.
5239 $label = $parameterMatch;
5240 continue;
5241 }
5242
5243 $paramName = $paramMap[$magicName];
5244 switch ( $paramName ) {
5245 case 'gallery-internal-alt':
5246 $hasAlt = true;
5247 $alt = $this->stripAltText( $match, false );
5248 break;
5249 case 'gallery-internal-link':
5250 $linkValue = $this->stripAltText( $match, false );
5251 if ( preg_match( '/^-{R\|(.*)}-$/', $linkValue ) ) {
5252 // Result of LanguageConverter::markNoConversion
5253 // invoked on an external link.
5254 $linkValue = substr( $linkValue, 4, -2 );
5255 }
5256 [ $type, $target ] = $this->parseLinkParameter( $linkValue );
5257 if ( $type ) {
5258 if ( $type === 'no-link' ) {
5259 $target = true;
5260 }
5261 $imageOptions[$type] = $target;
5262 }
5263 break;
5264 default:
5265 // Must be a handler specific parameter.
5266 if ( $handler->validateParam( $paramName, $match ) ) {
5267 $handlerOptions[$paramName] = $match;
5268 } else {
5269 // Guess not, consider it as caption.
5270 $this->logger->debug(
5271 "$parameterMatch failed parameter validation" );
5272 $label = $parameterMatch;
5273 }
5274 }
5275 }
5276 }
5277
5278 // Match makeImage when !$hasVisibleCaption
5279 if ( !$hasAlt ) {
5280 if ( $label !== '' ) {
5281 $alt = $this->stripAltText( $label, false );
5282 } else {
5283 if ( $enableLegacyMediaDOM ) {
5284 $alt = $title->getText();
5285 }
5286 }
5287 }
5288 $imageOptions['title'] = $this->stripAltText( $label, false );
5289
5290 // Match makeImage which sets this unconditionally
5291 $handlerOptions['targetlang'] = $this->getTargetLanguage()->getCode();
5292
5293 $ig->add(
5294 $title, $label, $alt, '', $handlerOptions,
5295 ImageGalleryBase::LOADING_DEFAULT, $imageOptions
5296 );
5297 }
5298 $html = $ig->toHTML();
5299 $this->hookRunner->onAfterParserFetchFileAndTitle( $this, $ig, $html );
5300 return $html;
5301 }
5302
5307 private function getImageParams( $handler ) {
5308 if ( $handler ) {
5309 $handlerClass = get_class( $handler );
5310 } else {
5311 $handlerClass = '';
5312 }
5313 if ( !isset( $this->mImageParams[$handlerClass] ) ) {
5314 # Initialise static lists
5315 static $internalParamNames = [
5316 'horizAlign' => [ 'left', 'right', 'center', 'none' ],
5317 'vertAlign' => [ 'baseline', 'sub', 'super', 'top', 'text-top', 'middle',
5318 'bottom', 'text-bottom' ],
5319 'frame' => [ 'thumbnail', 'framed', 'frameless', 'border',
5320 // These parameters take arguments, so to ensure literals
5321 // have precedence, keep them listed last (T372935):
5322 'manualthumb', 'upright', 'link', 'alt', 'class' ],
5323 ];
5324 static $internalParamMap;
5325 if ( !$internalParamMap ) {
5326 $internalParamMap = [];
5327 foreach ( $internalParamNames as $type => $names ) {
5328 foreach ( $names as $name ) {
5329 // For grep: img_left, img_right, img_center, img_none,
5330 // img_baseline, img_sub, img_super, img_top, img_text_top, img_middle,
5331 // img_bottom, img_text_bottom,
5332 // img_thumbnail, img_manualthumb, img_framed, img_frameless, img_upright,
5333 // img_border, img_link, img_alt, img_class
5334 $magicName = str_replace( '-', '_', "img_$name" );
5335 $internalParamMap[$magicName] = [ $type, $name ];
5336 }
5337 }
5338 }
5339
5340 # Add handler params
5341 # Since img_width is one of these, it is important it is listed
5342 # *after* the literal parameter names above (T372935).
5343 $paramMap = $internalParamMap;
5344 if ( $handler ) {
5345 $handlerParamMap = $handler->getParamMap();
5346 foreach ( $handlerParamMap as $magic => $paramName ) {
5347 $paramMap[$magic] = [ 'handler', $paramName ];
5348 }
5349 } else {
5350 // Parse the size for non-existent files. See T273013
5351 $paramMap[ 'img_width' ] = [ 'handler', 'width' ];
5352 }
5353 $this->mImageParams[$handlerClass] = $paramMap;
5354 $this->mImageParamsMagicArray[$handlerClass] =
5355 $this->magicWordFactory->newArray( array_keys( $paramMap ) );
5356 }
5357 return [ $this->mImageParams[$handlerClass], $this->mImageParamsMagicArray[$handlerClass] ];
5358 }
5359
5369 public function makeImage( LinkTarget $link, $options, $holders = false ) {
5370 # Check if the options text is of the form "options|alt text"
5371 # Options are:
5372 # * thumbnail make a thumbnail with enlarge-icon and caption, alignment depends on lang
5373 # * left no resizing, just left align. label is used for alt= only
5374 # * right same, but right aligned
5375 # * none same, but not aligned
5376 # * ___px scale to ___ pixels width, no aligning. e.g. use in taxobox
5377 # * center center the image
5378 # * framed Keep original image size, no magnify-button.
5379 # * frameless like 'thumb' but without a frame. Keeps user preferences for width
5380 # * upright reduce width for upright images, rounded to full __0 px
5381 # * border draw a 1px border around the image
5382 # * alt Text for HTML alt attribute (defaults to empty)
5383 # * class Set a class for img node
5384 # * link Set the target of the image link. Can be external, interwiki, or local
5385 # vertical-align values (no % or length right now):
5386 # * baseline
5387 # * sub
5388 # * super
5389 # * top
5390 # * text-top
5391 # * middle
5392 # * bottom
5393 # * text-bottom
5394
5395 # Protect LanguageConverter markup when splitting into parts
5396 $parts = StringUtils::delimiterExplode(
5397 '-{', '}-', '|', $options, true /* allow nesting */
5398 );
5399
5400 # Give extensions a chance to select the file revision for us
5401 $options = [];
5402 $descQuery = false;
5403 $title = Title::castFromLinkTarget( $link ); // hook signature compat
5404 $this->hookRunner->onBeforeParserFetchFileAndTitle(
5405 // @phan-suppress-next-line PhanTypeMismatchArgument Type mismatch on pass-by-ref args
5406 $this, $title, $options, $descQuery
5407 );
5408 # Fetch and register the file (file title may be different via hooks)
5409 [ $file, $link ] = $this->fetchFileAndTitle( $link, $options );
5410
5411 # Get parameter map
5412 $handler = $file ? $file->getHandler() : false;
5413
5414 [ $paramMap, $mwArray ] = $this->getImageParams( $handler );
5415
5416 if ( !$file ) {
5417 $this->addTrackingCategory( 'broken-file-category' );
5418 }
5419
5420 # Process the input parameters
5421 $caption = '';
5422 $params = [ 'frame' => [], 'handler' => [],
5423 'horizAlign' => [], 'vertAlign' => [] ];
5424 $seenformat = false;
5425 foreach ( $parts as $part ) {
5426 [ $magicName, $value ] = $mwArray->matchVariableStartToEnd( trim( $part ) );
5427 $validated = false;
5428 if ( isset( $paramMap[$magicName] ) ) {
5429 [ $type, $paramName ] = $paramMap[$magicName];
5430
5431 # Special case; width and height come in one variable together
5432 if ( $type === 'handler' && $paramName === 'width' ) {
5433 // The 'px' suffix has already been localized by img_width
5434 $parsedWidthParam = $this->parseWidthParam( $value, true, true );
5435 // Parsoid applies data-(width|height) attributes to broken
5436 // media spans, for client use. See T273013
5437 $validateFunc = static function ( $name, $value ) use ( $handler ) {
5438 return $handler
5439 ? $handler->validateParam( $name, $value )
5440 : $value > 0;
5441 };
5442 if ( isset( $parsedWidthParam['width'] ) ) {
5443 $width = $parsedWidthParam['width'];
5444 if ( $validateFunc( 'width', $width ) ) {
5445 $params[$type]['width'] = $width;
5446 $validated = true;
5447 }
5448 }
5449 if ( isset( $parsedWidthParam['height'] ) ) {
5450 $height = $parsedWidthParam['height'];
5451 if ( $validateFunc( 'height', $height ) ) {
5452 $params[$type]['height'] = $height;
5453 $validated = true;
5454 }
5455 }
5456 # else no validation -- T15436
5457 } else {
5458 if ( $type === 'handler' ) {
5459 # Validate handler parameter
5460 $validated = $handler->validateParam( $paramName, $value );
5461 } else {
5462 # Validate internal parameters
5463 switch ( $paramName ) {
5464 case 'alt':
5465 case 'class':
5466 $validated = true;
5467 $value = $this->stripAltText( $value, $holders );
5468 break;
5469 case 'link':
5470 [ $paramName, $value ] =
5471 $this->parseLinkParameter(
5472 $this->stripAltText( $value, $holders )
5473 );
5474 if ( $paramName ) {
5475 $validated = true;
5476 if ( $paramName === 'no-link' ) {
5477 $value = true;
5478 }
5479 }
5480 break;
5481 case 'manualthumb':
5482 # @todo FIXME: Possibly check validity here for
5483 # manualthumb? downstream behavior seems odd with
5484 # missing manual thumbs.
5485 $value = $this->stripAltText( $value, $holders );
5486 // fall through
5487 case 'frameless':
5488 case 'framed':
5489 case 'thumbnail':
5490 // use first appearing option, discard others.
5491 $validated = !$seenformat;
5492 $seenformat = true;
5493 break;
5494 default:
5495 # Most other things appear to be empty or numeric...
5496 $validated = ( $value === false || is_numeric( trim( $value ) ) );
5497 }
5498 }
5499
5500 if ( $validated ) {
5501 $params[$type][$paramName] = $value;
5502 }
5503 }
5504 }
5505 if ( !$validated ) {
5506 $caption = $part;
5507 }
5508 }
5509
5510 # Process alignment parameters
5511 if ( $params['horizAlign'] !== [] ) {
5512 $params['frame']['align'] = array_key_first( $params['horizAlign'] );
5513 }
5514 if ( $params['vertAlign'] !== [] ) {
5515 $params['frame']['valign'] = array_key_first( $params['vertAlign'] );
5516 }
5517
5518 $params['frame']['caption'] = $caption;
5519
5520 $enableLegacyMediaDOM = $this->svcOptions->get( MainConfigNames::ParserEnableLegacyMediaDOM );
5521
5522 # Will the image be presented in a frame, with the caption below?
5523 // @phan-suppress-next-line PhanImpossibleCondition
5524 $hasVisibleCaption = isset( $params['frame']['framed'] )
5525 // @phan-suppress-next-line PhanImpossibleCondition
5526 || isset( $params['frame']['thumbnail'] )
5527 // @phan-suppress-next-line PhanImpossibleCondition
5528 || isset( $params['frame']['manualthumb'] );
5529
5530 # In the old days, [[Image:Foo|text...]] would set alt text. Later it
5531 # came to also set the caption, ordinary text after the image -- which
5532 # makes no sense, because that just repeats the text multiple times in
5533 # screen readers. It *also* came to set the title attribute.
5534 # Now that we have an alt attribute, we should not set the alt text to
5535 # equal the caption: that's worse than useless, it just repeats the
5536 # text. This is the framed/thumbnail case. If there's no caption, we
5537 # use the unnamed parameter for alt text as well, just for the time be-
5538 # ing, if the unnamed param is set and the alt param is not.
5539 # For the future, we need to figure out if we want to tweak this more,
5540 # e.g., introducing a title= parameter for the title; ignoring the un-
5541 # named parameter entirely for images without a caption; adding an ex-
5542 # plicit caption= parameter and preserving the old magic unnamed para-
5543 # meter for BC; ...
5544 if ( $hasVisibleCaption ) {
5545 if (
5546 // @phan-suppress-next-line PhanImpossibleCondition
5547 $caption === '' && !isset( $params['frame']['alt'] ) &&
5548 $enableLegacyMediaDOM
5549 ) {
5550 # No caption or alt text, add the filename as the alt text so
5551 # that screen readers at least get some description of the image
5552 $params['frame']['alt'] = $link->getText();
5553 }
5554 # Do not set $params['frame']['title'] because tooltips are unnecessary
5555 # for framed images, the caption is visible
5556 } else {
5557 // @phan-suppress-next-line PhanImpossibleCondition
5558 if ( !isset( $params['frame']['alt'] ) ) {
5559 # No alt text, use the "caption" for the alt text
5560 if ( $caption !== '' ) {
5561 $params['frame']['alt'] = $this->stripAltText( $caption, $holders );
5562 } elseif ( $enableLegacyMediaDOM ) {
5563 # No caption, fall back to using the filename for the
5564 # alt text
5565 $params['frame']['alt'] = $link->getText();
5566 }
5567 }
5568 # Use the "caption" for the tooltip text
5569 $params['frame']['title'] = $this->stripAltText( $caption, $holders );
5570 }
5571 $params['handler']['targetlang'] = $this->getTargetLanguage()->getCode();
5572
5573 // hook signature compat again, $link may have changed
5574 $title = Title::castFromLinkTarget( $link );
5575 $this->hookRunner->onParserMakeImageParams( $title, $file, $params, $this );
5576
5577 # Linker does the rest
5578 $time = $options['time'] ?? false;
5579 // @phan-suppress-next-line PhanTypePossiblyInvalidDimOffset
5580 $ret = Linker::makeImageLink( $this, $link, $file, $params['frame'], $params['handler'],
5581 $time, $descQuery, $this->mOptions->getThumbSize() );
5582
5583 # Give the handler a chance to modify the parser object
5584 if ( $handler ) {
5585 $handler->parserTransformHook( $this, $file );
5586 }
5587 if ( $file ) {
5588 $this->modifyImageHtml( $file, $params, $ret );
5589 }
5590
5591 return $ret;
5592 }
5593
5612 private function parseLinkParameter( $value ) {
5613 $chars = self::EXT_LINK_URL_CLASS;
5614 $addr = self::EXT_LINK_ADDR;
5615 $prots = $this->urlUtils->validProtocols();
5616 $type = null;
5617 $target = false;
5618 if ( $value === '' ) {
5619 $type = 'no-link';
5620 } elseif ( preg_match( "/^((?i)$prots)/", $value ) ) {
5621 if ( preg_match( "/^((?i)$prots)$addr$chars*$/u", $value ) ) {
5622 $this->mOutput->addExternalLink( $value );
5623 $type = 'link-url';
5624 $target = $value;
5625 }
5626 } else {
5627 // Percent-decode link arguments for consistency with wikilink
5628 // handling (T216003#7836261).
5629 //
5630 // There's slight concern here though. The |link= option supports
5631 // two formats, link=Test%22test vs link=[[Test%22test]], both of
5632 // which are about to be decoded.
5633 //
5634 // In the former case, the decoding here is straightforward and
5635 // desirable.
5636 //
5637 // In the latter case, there's a potential for double decoding,
5638 // because the wikilink syntax has a higher precedence and has
5639 // already been parsed as a link before we get here. $value
5640 // has had stripAltText() called on it, which in turn calls
5641 // replaceLinkHoldersText() on the link. So, the text we're
5642 // getting at this point has already been percent decoded.
5643 //
5644 // The problematic case is if %25 is in the title, since that
5645 // decodes to %, which could combine with trailing characters.
5646 // However, % is not a valid link title character, so it would
5647 // not parse as a link and the string we received here would
5648 // still contain the encoded %25.
5649 //
5650 // Hence, double decoded is not an issue. See the test,
5651 // "Should not double decode the link option"
5652 if ( strpos( $value, '%' ) !== false ) {
5653 $value = rawurldecode( $value );
5654 }
5655 $linkTitle = Title::newFromText( $value );
5656 if ( $linkTitle ) {
5657 $this->mOutput->addLink( $linkTitle );
5658 $type = 'link-title';
5659 $target = $linkTitle;
5660 }
5661 }
5662 return [ $type, $target ];
5663 }
5664
5672 public function modifyImageHtml( File $file, array $params, string &$html ) {
5673 $this->hookRunner->onParserModifyImageHTML( $this, $file, $params, $html );
5674 }
5675
5681 private function stripAltText( $caption, $holders ) {
5682 # Strip bad stuff out of the title (tooltip). We can't just use
5683 # replaceLinkHoldersText() here, because if this function is called
5684 # from handleInternalLinks2(), mLinkHolders won't be up-to-date.
5685 if ( $holders ) {
5686 $tooltip = $holders->replaceText( $caption );
5687 } else {
5688 $tooltip = $this->replaceLinkHoldersText( $caption );
5689 }
5690
5691 # make sure there are no placeholders in thumbnail attributes
5692 # that are later expanded to html- so expand them now and
5693 # remove the tags
5694 $tooltip = $this->mStripState->unstripBoth( $tooltip );
5695 # Compatibility hack! In HTML certain entity references not terminated
5696 # by a semicolon are decoded (but not if we're in an attribute; that's
5697 # how link URLs get away without properly escaping & in queries).
5698 # But wikitext has always required semicolon-termination of entities,
5699 # so encode & where needed to avoid decode of semicolon-less entities.
5700 # See T209236 and
5701 # https://www.w3.org/TR/html5/syntax.html#named-character-references
5702 # T210437 discusses moving this workaround to Sanitizer::stripAllTags.
5703 $tooltip = preg_replace( "/
5704 & # 1. entity prefix
5705 (?= # 2. followed by:
5706 (?: # a. one of the legacy semicolon-less named entities
5707 A(?:Elig|MP|acute|circ|grave|ring|tilde|uml)|
5708 C(?:OPY|cedil)|E(?:TH|acute|circ|grave|uml)|
5709 GT|I(?:acute|circ|grave|uml)|LT|Ntilde|
5710 O(?:acute|circ|grave|slash|tilde|uml)|QUOT|REG|THORN|
5711 U(?:acute|circ|grave|uml)|Yacute|
5712 a(?:acute|c(?:irc|ute)|elig|grave|mp|ring|tilde|uml)|brvbar|
5713 c(?:cedil|edil|urren)|cent(?!erdot;)|copy(?!sr;)|deg|
5714 divide(?!ontimes;)|e(?:acute|circ|grave|th|uml)|
5715 frac(?:1(?:2|4)|34)|
5716 gt(?!c(?:c|ir)|dot|lPar|quest|r(?:a(?:pprox|rr)|dot|eq(?:less|qless)|less|sim);)|
5717 i(?:acute|circ|excl|grave|quest|uml)|laquo|
5718 lt(?!c(?:c|ir)|dot|hree|imes|larr|quest|r(?:Par|i(?:e|f|));)|
5719 m(?:acr|i(?:cro|ddot))|n(?:bsp|tilde)|
5720 not(?!in(?:E|dot|v(?:a|b|c)|)|ni(?:v(?:a|b|c)|);)|
5721 o(?:acute|circ|grave|rd(?:f|m)|slash|tilde|uml)|
5722 p(?:lusmn|ound)|para(?!llel;)|quot|r(?:aquo|eg)|
5723 s(?:ect|hy|up(?:1|2|3)|zlig)|thorn|times(?!b(?:ar|)|d;)|
5724 u(?:acute|circ|grave|ml|uml)|y(?:acute|en|uml)
5725 )
5726 (?:[^;]|$)) # b. and not followed by a semicolon
5727 # S = study, for efficiency
5728 /Sx", '&amp;', $tooltip );
5729 $tooltip = Sanitizer::stripAllTags( $tooltip );
5730
5731 return $tooltip;
5732 }
5733
5743 public function attributeStripCallback( &$text, $frame = false ) {
5744 wfDeprecated( __METHOD__, '1.35' );
5745 $text = $this->replaceVariables( $text, $frame );
5746 $text = $this->mStripState->unstripBoth( $text );
5747 return $text;
5748 }
5749
5756 public function getTags(): array {
5757 return array_keys( $this->mTagHooks );
5758 }
5759
5764 public function getFunctionSynonyms() {
5765 return $this->mFunctionSynonyms;
5766 }
5767
5772 public function getUrlProtocols() {
5773 return $this->urlUtils->validProtocols();
5774 }
5775
5806 private function extractSections( $text, $sectionId, $mode, $newText, ?PageReference $page = null ) {
5807 $magicScopeVariable = $this->lock();
5808 $this->startParse(
5809 $page,
5810 ParserOptions::newFromUser( RequestContext::getMain()->getUser() ),
5811 self::OT_PLAIN,
5812 true
5813 );
5814 $outText = '';
5815 $frame = $this->getPreprocessor()->newFrame();
5816
5817 # Process section extraction flags
5818 $flags = 0;
5819 $sectionParts = explode( '-', $sectionId );
5820 // The section ID may either be a magic string such as 'new' (which should be treated as 0),
5821 // or a numbered section ID in the format of "T-<section index>".
5822 // Explicitly coerce the section index into a number accordingly. (T323373)
5823 $sectionIndex = (int)array_pop( $sectionParts );
5824 foreach ( $sectionParts as $part ) {
5825 if ( $part === 'T' ) {
5826 $flags |= Preprocessor::DOM_FOR_INCLUSION;
5827 }
5828 }
5829
5830 # Check for empty input
5831 if ( strval( $text ) === '' ) {
5832 # Only sections 0 and T-0 exist in an empty document
5833 if ( $sectionIndex === 0 ) {
5834 if ( $mode === 'get' ) {
5835 return '';
5836 }
5837
5838 return $newText;
5839 } else {
5840 if ( $mode === 'get' ) {
5841 return $newText;
5842 }
5843
5844 return $text;
5845 }
5846 }
5847
5848 # Preprocess the text
5849 $root = $this->preprocessToDom( $text, $flags );
5850
5851 # <h> nodes indicate section breaks
5852 # They can only occur at the top level, so we can find them by iterating the root's children
5853 $node = $root->getFirstChild();
5854
5855 # Find the target section
5856 if ( $sectionIndex === 0 ) {
5857 # Section zero doesn't nest, level=big
5858 $targetLevel = 1000;
5859 } else {
5860 while ( $node ) {
5861 if ( $node->getName() === 'h' ) {
5862 $bits = $node->splitHeading();
5863 if ( $bits['i'] == $sectionIndex ) {
5864 $targetLevel = $bits['level'];
5865 break;
5866 }
5867 }
5868 if ( $mode === 'replace' ) {
5869 $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG );
5870 }
5871 $node = $node->getNextSibling();
5872 }
5873 }
5874
5875 if ( !$node ) {
5876 # Not found
5877 if ( $mode === 'get' ) {
5878 return $newText;
5879 } else {
5880 return $text;
5881 }
5882 }
5883
5884 # Find the end of the section, including nested sections
5885 do {
5886 if ( $node->getName() === 'h' ) {
5887 $bits = $node->splitHeading();
5888 $curLevel = $bits['level'];
5889 // @phan-suppress-next-line PhanPossiblyUndeclaredVariable False positive
5890 if ( $bits['i'] != $sectionIndex && $curLevel <= $targetLevel ) {
5891 break;
5892 }
5893 }
5894 if ( $mode === 'get' ) {
5895 $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG );
5896 }
5897 $node = $node->getNextSibling();
5898 } while ( $node );
5899
5900 # Write out the remainder (in replace mode only)
5901 if ( $mode === 'replace' ) {
5902 # Output the replacement text
5903 # Add two newlines on -- trailing whitespace in $newText is conventionally
5904 # stripped by the editor, so we need both newlines to restore the paragraph gap
5905 # Only add trailing whitespace if there is newText
5906 if ( $newText != "" ) {
5907 $outText .= $newText . "\n\n";
5908 }
5909
5910 while ( $node ) {
5911 $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG );
5912 $node = $node->getNextSibling();
5913 }
5914 }
5915
5916 # Re-insert stripped tags
5917 $outText = rtrim( $this->mStripState->unstripBoth( $outText ) );
5918
5919 return $outText;
5920 }
5921
5937 public function getSection( $text, $sectionId, $defaultText = '' ) {
5938 return $this->extractSections( $text, $sectionId, 'get', $defaultText );
5939 }
5940
5954 public function replaceSection( $oldText, $sectionId, $newText ) {
5955 return $this->extractSections( $oldText, $sectionId, 'replace', $newText );
5956 }
5957
5987 public function getFlatSectionInfo( $text ) {
5988 $magicScopeVariable = $this->lock();
5989 $this->startParse(
5990 null,
5991 ParserOptions::newFromUser( RequestContext::getMain()->getUser() ),
5992 self::OT_PLAIN,
5993 true
5994 );
5995 $frame = $this->getPreprocessor()->newFrame();
5996 $root = $this->preprocessToDom( $text, 0 );
5997 $node = $root->getFirstChild();
5998 $offset = 0;
5999 $currentSection = [
6000 'index' => 0,
6001 'level' => 0,
6002 'offset' => 0,
6003 'heading' => '',
6004 'text' => ''
6005 ];
6006 $sections = [];
6007
6008 while ( $node ) {
6009 $nodeText = $frame->expand( $node, PPFrame::RECOVER_ORIG );
6010 if ( $node->getName() === 'h' ) {
6011 $bits = $node->splitHeading();
6012 $sections[] = $currentSection;
6013 $currentSection = [
6014 'index' => $bits['i'],
6015 'level' => $bits['level'],
6016 'offset' => $offset,
6017 'heading' => $nodeText,
6018 'text' => $nodeText
6019 ];
6020 } else {
6021 $currentSection['text'] .= $nodeText;
6022 }
6023 $offset += strlen( $nodeText );
6024 $node = $node->getNextSibling();
6025 }
6026 $sections[] = $currentSection;
6027 return $sections;
6028 }
6029
6041 public function getRevisionId() {
6042 return $this->mRevisionId;
6043 }
6044
6051 public function getRevisionRecordObject() {
6052 if ( $this->mRevisionRecordObject ) {
6053 return $this->mRevisionRecordObject;
6054 }
6055
6056 // NOTE: try to get the RevisionRecord object even if mRevisionId is null.
6057 // This is useful when parsing a revision that has not yet been saved.
6058 // However, if we get back a saved revision even though we are in
6059 // preview mode, we'll have to ignore it, see below.
6060 // NOTE: This callback may be used to inject an OLD revision that was
6061 // already loaded, so "current" is a bit of a misnomer. We can't just
6062 // skip it if mRevisionId is set.
6063 $rev = call_user_func(
6064 $this->mOptions->getCurrentRevisionRecordCallback(),
6065 $this->getTitle(),
6066 $this
6067 );
6068
6069 if ( !$rev ) {
6070 // The revision record callback returns `false` (not null) to
6071 // indicate that the revision is missing. (See for example
6072 // Parser::statelessFetchRevisionRecord(), the default callback.)
6073 // This API expects `null` instead. (T251952)
6074 return null;
6075 }
6076
6077 if ( $this->mRevisionId === null && $rev->getId() ) {
6078 // We are in preview mode (mRevisionId is null), and the current revision callback
6079 // returned an existing revision. Ignore it and return null, it's probably the page's
6080 // current revision, which is not what we want here. Note that we do want to call the
6081 // callback to allow the unsaved revision to be injected here, e.g. for
6082 // self-transclusion previews.
6083 return null;
6084 }
6085
6086 // If the parse is for a new revision, then the callback should have
6087 // already been set to force the object and should match mRevisionId.
6088 // If not, try to fetch by mRevisionId instead.
6089 if ( $this->mRevisionId && $rev->getId() != $this->mRevisionId ) {
6090 $rev = MediaWikiServices::getInstance()
6091 ->getRevisionLookup()
6092 ->getRevisionById( $this->mRevisionId );
6093 }
6094
6095 $this->mRevisionRecordObject = $rev;
6096
6097 return $this->mRevisionRecordObject;
6098 }
6099
6106 public function getRevisionTimestamp() {
6107 if ( $this->mRevisionTimestamp !== null ) {
6108 return $this->mRevisionTimestamp;
6109 }
6110
6111 # Use specified revision timestamp, falling back to the current timestamp
6112 $revObject = $this->getRevisionRecordObject();
6113 $timestamp = $revObject && $revObject->getTimestamp()
6114 ? $revObject->getTimestamp()
6115 : $this->mOptions->getTimestamp();
6116 $this->mOutput->setRevisionTimestampUsed( $timestamp ); // unadjusted time zone
6117
6118 # The cryptic '' timezone parameter tells to use the site-default
6119 # timezone offset instead of the user settings.
6120 # Since this value will be saved into the parser cache, served
6121 # to other users, and potentially even used inside links and such,
6122 # it needs to be consistent for all visitors.
6123 $this->mRevisionTimestamp = $this->contLang->userAdjust( $timestamp, '' );
6124
6125 return $this->mRevisionTimestamp;
6126 }
6127
6134 public function getRevisionUser(): ?string {
6135 if ( $this->mRevisionUser === null ) {
6136 $revObject = $this->getRevisionRecordObject();
6137
6138 # if this template is subst: the revision id will be blank,
6139 # so just use the current user's name
6140 if ( $revObject && $revObject->getUser() ) {
6141 $this->mRevisionUser = $revObject->getUser()->getName();
6142 } elseif ( $this->ot['wiki'] || $this->mOptions->getIsPreview() ) {
6143 $this->mRevisionUser = $this->getUserIdentity()->getName();
6144 } else {
6145 # Note that we fall through here with
6146 # $this->mRevisionUser still null
6147 }
6148 }
6149 return $this->mRevisionUser;
6150 }
6151
6158 public function getRevisionSize() {
6159 if ( $this->mRevisionSize === null ) {
6160 $revObject = $this->getRevisionRecordObject();
6161
6162 # if this variable is subst: the revision id will be blank,
6163 # so just use the parser input size, because the own substitution
6164 # will change the size.
6165 if ( $revObject ) {
6166 $this->mRevisionSize = $revObject->getSize();
6167 } else {
6168 $this->mRevisionSize = $this->mInputSize;
6169 }
6170 }
6171 return $this->mRevisionSize;
6172 }
6173
6187 public function getDefaultSort() {
6188 wfDeprecated( __METHOD__, '1.38' );
6189 return $this->mOutput->getPageProperty( 'defaultsort' ) ?? '';
6190 }
6191
6192 private static function getSectionNameFromStrippedText( $text ) {
6193 $text = Sanitizer::normalizeSectionNameWhitespace( $text );
6194 $text = Sanitizer::decodeCharReferences( $text );
6195 $text = self::normalizeSectionName( $text );
6196 return $text;
6197 }
6198
6199 private static function makeAnchor( $sectionName ) {
6200 return '#' . Sanitizer::escapeIdForLink( $sectionName );
6201 }
6202
6203 private function makeLegacyAnchor( $sectionName ) {
6204 $fragmentMode = $this->svcOptions->get( MainConfigNames::FragmentMode );
6205 if ( isset( $fragmentMode[1] ) && $fragmentMode[1] === 'legacy' ) {
6206 // ForAttribute() and ForLink() are the same for legacy encoding
6207 $id = Sanitizer::escapeIdForAttribute( $sectionName, Sanitizer::ID_FALLBACK );
6208 } else {
6209 $id = Sanitizer::escapeIdForLink( $sectionName );
6210 }
6211
6212 return "#$id";
6213 }
6214
6224 public function guessSectionNameFromWikiText( $text ) {
6225 # Strip out wikitext links(they break the anchor)
6226 $text = $this->stripSectionName( $text );
6227 $sectionName = self::getSectionNameFromStrippedText( $text );
6228 return self::makeAnchor( $sectionName );
6229 }
6230
6241 public function guessLegacySectionNameFromWikiText( $text ) {
6242 # Strip out wikitext links(they break the anchor)
6243 $text = $this->stripSectionName( $text );
6244 $sectionName = self::getSectionNameFromStrippedText( $text );
6245 return $this->makeLegacyAnchor( $sectionName );
6246 }
6247
6254 public static function guessSectionNameFromStrippedText( $text ) {
6255 $sectionName = self::getSectionNameFromStrippedText( $text );
6256 return self::makeAnchor( $sectionName );
6257 }
6258
6265 private static function normalizeSectionName( $text ) {
6266 # T90902: ensure the same normalization is applied for IDs as to links
6268 $titleParser = MediaWikiServices::getInstance()->getTitleParser();
6269 '@phan-var MediaWikiTitleCodec $titleParser';
6270 try {
6271
6272 $parts = $titleParser->splitTitleString( "#$text" );
6273 } catch ( MalformedTitleException $ex ) {
6274 return $text;
6275 }
6276 return $parts['fragment'];
6277 }
6278
6294 public function stripSectionName( $text ) {
6295 # Strip internal link markup
6296 $text = preg_replace( '/\[\[:?([^[|]+)\|([^[]+)\]\]/', '$2', $text );
6297 $text = preg_replace( '/\[\[:?([^[]+)\|?\]\]/', '$1', $text );
6298
6299 # Strip external link markup
6300 # @todo FIXME: Not tolerant to blank link text
6301 # I.E. [https://www.mediawiki.org] will render as [1] or something depending
6302 # on how many empty links there are on the page - need to figure that out.
6303 $text = preg_replace(
6304 '/\[(?i:' . $this->urlUtils->validProtocols() . ')([^ ]+?) ([^[]+)\]/', '$2', $text );
6305
6306 # Parse wikitext quotes (italics & bold)
6307 $text = $this->doQuotes( $text );
6308
6309 # Strip HTML tags
6310 $text = StringUtils::delimiterReplace( '<', '>', '', $text );
6311 return $text;
6312 }
6313
6332 public function markerSkipCallback( $s, callable $callback ) {
6333 $i = 0;
6334 $out = '';
6335 while ( $i < strlen( $s ) ) {
6336 $markerStart = strpos( $s, self::MARKER_PREFIX, $i );
6337 if ( $markerStart === false ) {
6338 $out .= call_user_func( $callback, substr( $s, $i ) );
6339 break;
6340 } else {
6341 $out .= call_user_func( $callback, substr( $s, $i, $markerStart - $i ) );
6342 $markerEnd = strpos( $s, self::MARKER_SUFFIX, $markerStart );
6343 if ( $markerEnd === false ) {
6344 $out .= substr( $s, $markerStart );
6345 break;
6346 } else {
6347 $markerEnd += strlen( self::MARKER_SUFFIX );
6348 $out .= substr( $s, $markerStart, $markerEnd - $markerStart );
6349 $i = $markerEnd;
6350 }
6351 }
6352 }
6353 return $out;
6354 }
6355
6363 public function killMarkers( $text ) {
6364 return $this->mStripState->killMarkers( $text );
6365 }
6366
6380 public function parseWidthParam( $value, $parseHeight = true, bool $localized = false ) {
6381 $parsedWidthParam = [];
6382 if ( $value === '' ) {
6383 return $parsedWidthParam;
6384 }
6385 $m = [];
6386 if ( !$localized ) {
6387 // Strip a localized 'px' suffix (T374311)
6388 $mwArray = $this->magicWordFactory->newArray( [ 'img_width' ] );
6389 [ $magicWord, $newValue ] = $mwArray->matchVariableStartToEnd( $value );
6390 $value = $magicWord ? $newValue : $value;
6391 }
6392
6393 # (T15500) In both cases (width/height and width only),
6394 # permit trailing "px" for backward compatibility.
6395 if ( $parseHeight && preg_match( '/^([0-9]*)x([0-9]*)\s*(px)?\s*$/', $value, $m ) ) {
6396 $width = intval( $m[1] );
6397 $height = intval( $m[2] );
6398 $parsedWidthParam['width'] = $width;
6399 $parsedWidthParam['height'] = $height;
6400 if ( $m[3] ?? false ) {
6401 $this->addTrackingCategory( 'double-px-category' );
6402 }
6403 } elseif ( preg_match( '/^([0-9]*)\s*(px)?\s*$/', $value, $m ) ) {
6404 $width = intval( $m[1] );
6405 $parsedWidthParam['width'] = $width;
6406 if ( $m[2] ?? false ) {
6407 $this->addTrackingCategory( 'double-px-category' );
6408 }
6409 }
6410 return $parsedWidthParam;
6411 }
6412
6421 protected function lock() {
6422 if ( $this->mInParse ) {
6423 throw new LogicException( "Parser state cleared while parsing. "
6424 . "Did you call Parser::parse recursively? Lock is held by: " . $this->mInParse );
6425 }
6426
6427 // Save the backtrace when locking, so that if some code tries locking again,
6428 // we can print the lock owner's backtrace for easier debugging
6429 $e = new RuntimeException;
6430 $this->mInParse = $e->getTraceAsString();
6431
6432 $recursiveCheck = new ScopedCallback( function () {
6433 $this->mInParse = false;
6434 } );
6435
6436 return $recursiveCheck;
6437 }
6438
6446 public function isLocked() {
6447 return (bool)$this->mInParse;
6448 }
6449
6460 public static function stripOuterParagraph( $html ) {
6461 $m = [];
6462 if ( preg_match( '/^<p>(.*)\n?<\/p>\n?$/sU', $html, $m ) && strpos( $m[1], '</p>' ) === false ) {
6463 $html = $m[1];
6464 }
6465
6466 return $html;
6467 }
6468
6479 public static function formatPageTitle( $nsText, $nsSeparator, $mainText ): string {
6480 $html = '';
6481 if ( $nsText !== '' ) {
6482 $html .= '<span class="mw-page-title-namespace">' . HtmlArmor::getHtml( $nsText ) . '</span>';
6483 $html .= '<span class="mw-page-title-separator">' . HtmlArmor::getHtml( $nsSeparator ) . '</span>';
6484 }
6485 $html .= '<span class="mw-page-title-main">' . HtmlArmor::getHtml( $mainText ) . '</span>';
6486 return $html;
6487 }
6488
6495 public static function extractBody( string $text ): string {
6496 $text = preg_replace( '!^.*?<body[^>]*>!s', '', $text, 1 );
6497 $text = preg_replace( '!</body>\s*</html>\s*$!', '', $text, 1 );
6498 return $text;
6499 }
6500
6508 public function enableOOUI() {
6509 wfDeprecated( __METHOD__, '1.35' );
6510 OutputPage::setupOOUI();
6511 $this->mOutput->setEnableOOUI( true );
6512 }
6513
6520 private function setOutputFlag( string $flag, string $reason ): void {
6521 $this->mOutput->setOutputFlag( $flag );
6522 $name = $this->getTitle()->getPrefixedText();
6523 $this->logger->debug( __METHOD__ . ": set $flag flag on '$name'; $reason" );
6524 }
6525}
6526
6528class_alias( Parser::class, 'Parser' );
const OT_WIKI
Definition Defines.php:159
const NS_FILE
Definition Defines.php:71
const NS_MEDIAWIKI
Definition Defines.php:73
const NS_TEMPLATE
Definition Defines.php:75
const NS_SPECIAL
Definition Defines.php:54
const OT_PLAIN
Definition Defines.php:161
const OT_PREPROCESS
Definition Defines.php:160
const OT_HTML
Definition Defines.php:158
const NS_MEDIA
Definition Defines.php:53
const NS_CATEGORY
Definition Defines.php:79
wfEscapeWikiText( $input)
Escapes the given text so that it may be output using addWikiText() without any linking,...
wfDeprecatedMsg( $msg, $version=false, $component=false, $callerOffset=2)
Log a deprecation warning with arbitrary message text.
wfGetUrlUtils()
wfHostname()
Get host name of the current machine, for use in error reporting.
wfSetVar(&$dest, $source, $force=false)
Sets dest to source and returns the original value of dest If source is NULL, it just returns the val...
wfMessage( $key,... $params)
This is the function for getting translated interface messages.
wfDeprecated( $function, $version=false, $component=false, $callerOffset=2)
Logs a warning that a deprecated feature was used.
Title null $mTitle
array $params
The job parameters.
if(!defined('MW_SETUP_CALLBACK'))
Definition WebStart.php:81
Expansions of core magic variables, used by the parser.
Various core parser functions, registered in every Parser.
Various tag hooks, registered in every Parser.
static register(Parser $parser, ServiceOptions $options)
const REGISTER_OPTIONS
Implements some public methods and some protected utility functions which are required by multiple ch...
Definition File.php:76
Marks HTML that shouldn't be escaped.
Definition HtmlArmor.php:30
Class for exceptions thrown by ImageGalleryBase::factory().
Store key-value entries in a size-limited in-memory LRU cache.
Base media handler class.
Helper class for mapping value objects representing basic entities to cache keys.
This class performs some operations related to tracking categories, such as adding a tracking categor...
A class for passing options to services.
assertRequiredOptions(array $expectedKeys)
Assert that the list of options provided in this instance exactly match $expectedKeys,...
Content object implementation for representing flat text.
Group all the pieces relevant to the context of a request into one instance.
This class provides an implementation of the core hook interfaces, forwarding hook calls to HookConta...
This class is a collection of static functions that serve two purposes:
Definition Html.php:56
Factory creating MWHttpRequest objects.
Methods for dealing with language codes.
Base class for language-specific code.
Definition Language.php:78
Variant of the Message class.
An interface for creating language converters.
isConversionDisabled()
Whether to disable language variant conversion.
A service that provides utilities to do with language names and codes.
Factory to create LinkRender objects.
Class that generates HTML for internal links.
Some internal bits split of from Skin.php.
Definition Linker.php:63
A class containing constants representing the names of configuration variables.
const EnableParserLimitReporting
Name constant for the EnableParserLimitReporting setting, for use with Config::get()
const MaxSigChars
Name constant for the MaxSigChars setting, for use with Config::get()
const ServerName
Name constant for the ServerName setting, for use with Config::get()
const ParserEnableUserLanguage
Name constant for the ParserEnableUserLanguage setting, for use with Config::get()
const AllowSlowParserFunctions
Name constant for the AllowSlowParserFunctions setting, for use with Config::get()
const AllowDisplayTitle
Name constant for the AllowDisplayTitle setting, for use with Config::get()
const StylePath
Name constant for the StylePath setting, for use with Config::get()
const MaxTocLevel
Name constant for the MaxTocLevel setting, for use with Config::get()
const Localtimezone
Name constant for the Localtimezone setting, for use with Config::get()
const Server
Name constant for the Server setting, for use with Config::get()
const FragmentMode
Name constant for the FragmentMode setting, for use with Config::get()
const EnableScaryTranscluding
Name constant for the EnableScaryTranscluding setting, for use with Config::get()
const TranscludeCacheExpiry
Name constant for the TranscludeCacheExpiry setting, for use with Config::get()
const Sitename
Name constant for the Sitename setting, for use with Config::get()
const ArticlePath
Name constant for the ArticlePath setting, for use with Config::get()
const ScriptPath
Name constant for the ScriptPath setting, for use with Config::get()
const ParserEnableLegacyMediaDOM
Name constant for the ParserEnableLegacyMediaDOM setting, for use with Config::get()
const SignatureValidation
Name constant for the SignatureValidation setting, for use with Config::get()
const MiserMode
Name constant for the MiserMode setting, for use with Config::get()
const RawHtml
Name constant for the RawHtml setting, for use with Config::get()
const PreprocessorCacheThreshold
Name constant for the PreprocessorCacheThreshold setting, for use with Config::get()
const ExtraInterlanguageLinkPrefixes
Name constant for the ExtraInterlanguageLinkPrefixes setting, for use with Config::get()
const ShowHostnames
Name constant for the ShowHostnames setting, for use with Config::get()
Service locator for MediaWiki core services.
The Message class deals with fetching and processing of interface message into a variety of formats.
Definition Message.php:150
This is one of the Core classes and should be read at least once by any new developers.
Class for handling an array of magic words.
Store information about magic words, and create/cache MagicWord objects.
ParserOutput is a rendering of a Content object or a message.
PHP Parser - Processes wiki markup (which uses a more user-friendly syntax, such as "[[link]]" for ma...
Definition Parser.php:155
guessLegacySectionNameFromWikiText( $text)
Same as guessSectionNameFromWikiText(), but produces legacy anchors instead, if possible.
Definition Parser.php:6241
$mExpensiveFunctionCount
Number of expensive parser function calls.
Definition Parser.php:305
getTargetLanguageConverter()
Shorthand for getting a Language Converter for Target language.
Definition Parser.php:1659
setOutputType( $ot)
Mutator for the output type.
Definition Parser.php:1085
getBadFileLookup()
Get the BadFileLookup instance that this Parser is using.
Definition Parser.php:1260
stripSectionName( $text)
Strips a text string of wikitext for use in a section anchor.
Definition Parser.php:6294
getDefaultSort()
Accessor for the 'defaultsort' page property.
Definition Parser.php:6187
limitationWarn( $limitationType, $current='', $max='')
Warn the user when a parser limitation is reached Will warn at most once the user per limitation type...
Definition Parser.php:2995
makeImage(LinkTarget $link, $options, $holders=false)
Parse image options text and use it to make an image.
Definition Parser.php:5369
const OT_PLAIN
Output type: like Parser::extractSections() - portions of the original are returned unchanged.
Definition Parser.php:202
static guessSectionNameFromStrippedText( $text)
Like guessSectionNameFromWikiText(), but takes already-stripped text as input.
Definition Parser.php:6254
static statelessFetchTemplate( $page, $parser=false)
Static function to get a template Can be overridden via ParserOptions::setTemplateCallback().
Definition Parser.php:3648
markerSkipCallback( $s, callable $callback)
Call a callback function on all regions of the given text that are not inside strip markers,...
Definition Parser.php:6332
getPreloadText( $text, PageReference $page, ParserOptions $options, $params=[])
Process the wikitext for the "?preload=" feature.
Definition Parser.php:983
getLinkRenderer()
Get a LinkRenderer instance to make links with.
Definition Parser.php:1225
parse( $text, PageReference $page, ParserOptions $options, $linestart=true, $clearState=true, $revid=null)
Convert wikitext to HTML Do not call this function recursively.
Definition Parser.php:681
tagNeedsNowikiStrippedInTagPF(string $lowerTagName)
Definition Parser.php:3962
doBlockLevels( $text, $linestart)
Make lists from lines starting with ':', '*', '#', etc.
Definition Parser.php:2838
getMagicWordFactory()
Get the MagicWordFactory that this Parser is using.
Definition Parser.php:1240
lock()
Lock the current instance of the parser.
Definition Parser.php:6421
setFunctionHook( $id, callable $callback, $flags=0)
Create a function, e.g.
Definition Parser.php:5037
const EXT_LINK_URL_CLASS
Everything except bracket, space, or control characters.
Definition Parser.php:170
msg(string $msg,... $args)
Helper function to correctly set the target language and title of a message based on the parser conte...
Definition Parser.php:4180
preprocess( $text, ?PageReference $page, ParserOptions $options, $revid=null, $frame=false)
Expand templates and variables in the text, producing valid, static wikitext.
Definition Parser.php:936
firstCallInit()
Used to do various kinds of initialisation on the first call of the parser.
Definition Parser.php:601
guessSectionNameFromWikiText( $text)
Try to guess the section anchor name based on a wikitext fragment presumably extracted from a heading...
Definition Parser.php:6224
getUserSig(UserIdentity $user, $nickname=false, $fancySig=null)
Fetch the user's signature text, if any, and normalize to validated, ready-to-insert wikitext.
Definition Parser.php:4728
setTitle(Title $t=null)
Set the context title.
Definition Parser.php:1016
interwikiTransclude(LinkTarget $link, $action)
Transclude an interwiki link.
Definition Parser.php:3850
makeLimitReport()
Set the limit report data in the current ParserOutput.
Definition Parser.php:780
validateSig( $text)
Check that the user's signature contains no bad XML.
Definition Parser.php:4795
isCurrentRevisionOfTitleCached(LinkTarget $link)
Definition Parser.php:3567
getRevisionId()
Get the ID of the revision we are parsing.
Definition Parser.php:6041
renderImageGallery( $text, array $params)
Renders an image gallery from a text with one line per image.
Definition Parser.php:5121
argSubstitution(array $piece, PPFrame $frame)
Triple brace replacement – used for template arguments.
Definition Parser.php:3921
replaceSection( $oldText, $sectionId, $newText)
This function returns $oldtext after the content of the section specified by $section has been replac...
Definition Parser.php:5954
transformMsg( $text, ParserOptions $options, ?PageReference $page=null)
Wrapper for preprocess()
Definition Parser.php:4937
insertStripItem( $text)
Add an item to the strip state Returns the unique tag which must be inserted into the stripped text T...
Definition Parser.php:1367
internalParse( $text, $isMain=true, $frame=false)
Helper function for parse() that transforms wiki markup into half-parsed HTML.
Definition Parser.php:1590
static normalizeLinkUrl( $url)
Replace unusual escape codes in a URL with their equivalent characters.
Definition Parser.php:2325
replaceLinkHolders(&$text)
Replace "<!--LINK-->" link placeholders with actual links, in the buffer Placeholders created in Link...
Definition Parser.php:5082
static extractTagsAndParams(array $elements, $text, &$matches)
Replaces all occurrences of HTML-style comments and the given tags in the text with a random marker a...
Definition Parser.php:1283
static statelessFetchRevisionRecord(LinkTarget $link, $parser=null)
Wrapper around RevisionLookup::getKnownCurrentRevision.
Definition Parser.php:3583
getHookRunner()
Get a HookRunner for calling core hooks.
Definition Parser.php:1695
getContentLanguage()
Get the content language that this Parser is using.
Definition Parser.php:1250
getExternalLinkAttribs( $url)
Get an associative array of additional HTML attributes appropriate for a particular external link.
Definition Parser.php:2292
parseWidthParam( $value, $parseHeight=true, bool $localized=false)
Parsed a width param of imagelink like 300px or 200x300px.
Definition Parser.php:6380
setPage(?PageReference $t=null)
Set the page used as context for parsing, e.g.
Definition Parser.php:1035
setOptions(ParserOptions $options)
Mutator for the ParserOptions object.
Definition Parser.php:1135
preSaveTransform( $text, PageReference $page, UserIdentity $user, ParserOptions $options, $clearState=true)
Transform wiki markup when saving a page by doing "\\r\\n" -> "\\n" conversion, substituting signatur...
Definition Parser.php:4607
killMarkers( $text)
Remove any strip markers found in the given text.
Definition Parser.php:6363
const OT_PREPROCESS
Output type: like Parser::preprocess()
Definition Parser.php:197
cleanSig( $text, $parsing=false)
Clean up signature text.
Definition Parser.php:4810
isLocked()
Will entry points such as parse() throw an exception due to the parser already being active?
Definition Parser.php:6446
getTemplateDom(LinkTarget $title)
Get the semi-parsed DOM representation of a template with a given title, and its redirect destination...
Definition Parser.php:3492
getRevisionUser()
Get the name of the user that edited the last revision.
Definition Parser.php:6134
getFlatSectionInfo( $text)
Get an array of preprocessor section information.
Definition Parser.php:5987
getTargetLanguage()
Get the target language for the content being parsed.
Definition Parser.php:1186
clearState()
Clear Parser state.
Definition Parser.php:613
getFunctionHooks()
Get all registered function hook identifiers.
Definition Parser.php:5071
enableOOUI()
Set's up the PHP implementation of OOUI for use in this request and instructs OutputPage to enable OO...
Definition Parser.php:6508
braceSubstitution(array $piece, PPFrame $frame)
Return the text of a template, after recursively replacing any variables or templates within the temp...
Definition Parser.php:3020
getUserIdentity()
Get a user either from the user set on Parser if it's set, or from the ParserOptions object otherwise...
Definition Parser.php:1205
static formatPageTitle( $nsText, $nsSeparator, $mainText)
Add HTML tags marking the parts of a page title, to be displayed in the first heading of the page.
Definition Parser.php:6479
setUser(?UserIdentity $user)
Set the current user.
Definition Parser.php:1005
Options( $x=null)
Accessor/mutator for the ParserOptions object.
Definition Parser.php:1146
getHookContainer()
Get a HookContainer capable of returning metadata about hooks or running extension hooks.
Definition Parser.php:1683
getOutputType()
Accessor for the output type.
Definition Parser.php:1076
recursivePreprocess( $text, $frame=false)
Recursive parser entry point that can be called from an extension tag hook.
Definition Parser.php:963
getRevisionSize()
Get the size of the revision.
Definition Parser.php:6158
getPreprocessor()
Get a preprocessor object.
Definition Parser.php:1215
getStripList()
Get a list of strippable XML-like elements.
Definition Parser.php:1346
extensionSubstitution(array $params, PPFrame $frame, bool $processNowiki=false)
Return the text to be used for a given extension tag.
Definition Parser.php:3986
setHook( $tag, callable $callback)
Create an HTML-style tag, e.g.
Definition Parser.php:4971
preprocessToDom( $text, $flags=0)
Get the document object model for the given wikitext.
Definition Parser.php:2918
getSection( $text, $sectionId, $defaultText='')
This function returns the text of a section, specified by a number ($section).
Definition Parser.php:5937
const OT_WIKI
Output type: like Parser::preSaveTransform()
Definition Parser.php:195
fetchTemplateAndTitle(LinkTarget $link)
Fetch the unparsed text of a template and register a reference to it.
Definition Parser.php:3606
static getExternalLinkRel( $url=false, LinkTarget $title=null)
Get the rel attribute for a particular external link.
Definition Parser.php:2266
static stripOuterParagraph( $html)
Strip outer.
Definition Parser.php:6460
getRevisionRecordObject()
Get the revision record object for $this->mRevisionId.
Definition Parser.php:6051
parseExtensionTagAsTopLevelDoc( $text)
Needed by Parsoid/PHP to ensure all the hooks for extensions are run in the right order.
Definition Parser.php:917
OutputType( $x=null)
Accessor/mutator for the output type.
Definition Parser.php:1103
clearTagHooks()
Remove all tag hooks.
Definition Parser.php:4989
modifyImageHtml(File $file, array $params, string &$html)
Give hooks a chance to modify image thumbnail HTML.
Definition Parser.php:5672
static extractBody(string $text)
Strip everything but the <body> from the provided string.
Definition Parser.php:6495
getRevisionTimestamp()
Get the timestamp associated with the current revision, adjusted for the default server-local timesta...
Definition Parser.php:6106
__clone()
Allow extensions to clean up when the parser is cloned.
Definition Parser.php:585
static cleanSigInSig( $text)
Strip 3, 4 or 5 tildes out of signatures.
Definition Parser.php:4852
callParserFunction(PPFrame $frame, $function, array $args=[])
Call a parser function and return an array with text and flags.
Definition Parser.php:3402
__construct(ServiceOptions $svcOptions, MagicWordFactory $magicWordFactory, Language $contLang, ParserFactory $factory, UrlUtils $urlUtils, SpecialPageFactory $spFactory, LinkRendererFactory $linkRendererFactory, NamespaceInfo $nsInfo, LoggerInterface $logger, BadFileLookup $badFileLookup, LanguageConverterFactory $languageConverterFactory, LanguageNameUtils $languageNameUtils, HookContainer $hookContainer, TidyDriverBase $tidy, WANObjectCache $wanCache, UserOptionsLookup $userOptionsLookup, UserFactory $userFactory, TitleFormatter $titleFormatter, HttpRequestFactory $httpRequestFactory, TrackingCategories $trackingCategories, SignatureValidatorFactory $signatureValidatorFactory, UserNameUtils $userNameUtils)
Constructing parsers directly is not allowed! Use a ParserFactory.
Definition Parser.php:472
attributeStripCallback(&$text, $frame=false)
Callback from the Sanitizer for expanding items found in HTML attribute values, so they can be safely...
Definition Parser.php:5743
__destruct()
Reduce memory usage to reduce the impact of circular references.
Definition Parser.php:570
recursiveTagParse( $text, $frame=false)
Half-parse wikitext to half-parsed HTML.
Definition Parser.php:868
replaceVariables( $text, $frame=false, $argsOnly=false)
Replace magic variables, templates, and template arguments with the appropriate text.
Definition Parser.php:2943
doQuotes( $text)
Helper function for handleAllQuotes()
Definition Parser.php:1995
static replaceTableOfContentsMarker( $text, $toc)
Replace table of contents marker in parsed HTML.
Definition Parser.php:4873
const OT_HTML
Output type: like Parser::parse()
Definition Parser.php:193
recursiveTagParseFully( $text, $frame=false)
Fully parse wikitext to fully parsed HTML.
Definition Parser.php:892
fetchFileNoRegister(LinkTarget $link, array $options=[])
Helper function for fetchFileAndTitle.
Definition Parser.php:3827
getPage()
Returns the page used as context for parsing, e.g.
Definition Parser.php:1058
fetchFileAndTitle(LinkTarget $link, array $options=[])
Fetch a file and its title and register a reference to it.
Definition Parser.php:3800
fetchCurrentRevisionRecordOfTitle(LinkTarget $link)
Fetch the current revision of a given title as a RevisionRecord.
Definition Parser.php:3537
startExternalParse(?PageReference $page, ParserOptions $options, $outputType, $clearState=true, $revId=null)
Set up some variables which are usually set up in parse() so that an external function can call some ...
Definition Parser.php:4902
getFunctionLang()
Get a language object for use in parser functions such as {{FORMATNUM:}}.
Definition Parser.php:1173
resetOutput()
Reset the ParserOutput.
Definition Parser.php:658
static removeSomeTags(string $text, array $options=[])
Cleans up HTML, removes dangerous tags and attributes, and removes HTML comments; the result will alw...
WebRequest clone which takes values from a provided array.
Exception representing a failure to look up a revision.
Page revision base class.
Value object representing a content slot associated with a page revision.
Factory for handling the special page list and generating SpecialPage objects.
Parent class for all special pages.
Base class for HTML cleanup utilities.
MalformedTitleException is thrown when a TitleParser is unable to parse a title string.
A codec for MediaWiki page titles.
This is a utility class for dealing with namespaces that encodes all the "magic" behaviors of them ba...
Represents a title within MediaWiki.
Definition Title.php:78
Provides access to user options.
Creates User objects.
UserNameUtils service.
internal since 1.36
Definition User.php:93
Library for creating and parsing MW-style timestamps.
A service to expand, parse, and otherwise manipulate URLs.
Definition UrlUtils.php:16
validProtocols()
Returns a partial regular expression of recognized URL protocols, e.g.
Definition UrlUtils.php:354
Module of static functions for generating XML.
Definition Xml.php:37
Set options of the Parser.
getExpensiveParserFunctionLimit()
Maximum number of calls per parse to expensive parser functions.
getPreSaveTransform()
Transform wiki markup when saving the page?
getDisableTitleConversion()
Whether title conversion should be disabled.
Differences from DOM schema:
const DOM_FOR_INCLUSION
Transclusion mode flag for Preprocessor::preprocessToObj()
Arbitrary section name based PHP profiling.
A collection of static methods to play with strings.
static replaceMarkup( $search, $replace, $text)
More or less "markup-safe" str_replace() Ignores any instances of the separator inside <....
static explode( $separator, $subject)
Workalike for explode() with limited memory usage.
Multi-datacenter aware caching interface.
return[0=> 'ـ', 1=> ' ', 2=> '`', 3=> '´', 4=> '˜', 5=> '^', 6=> '¯', 7=> '‾', 8=> '˘', 9=> '˙', 10=> '¨', 11=> '˚', 12=> '˝', 13=> '᾽', 14=> '῝', 15=> '¸', 16=> '˛', 17=> '_', 18=> '‗', 19=> '῀', 20=> '﮲', 21=> '﮳', 22=> '﮴', 23=> '﮵', 24=> '﮶', 25=> '﮷', 26=> '﮸', 27=> '﮹', 28=> '﮺', 29=> '﮻', 30=> '﮼', 31=> '﮽', 32=> '﮾', 33=> '﮿', 34=> '﯀', 35=> '﯁', 36=> '゛', 37=> '゜', 38=> '-', 39=> '֊', 40=> '᐀', 41=> '᭠', 42=> '᠆', 43=> '᠇', 44=> '‐', 45=> '‒', 46=> '–', 47=> '—', 48=> '―', 49=> '⁓', 50=> '⸗', 51=> '゠', 52=> '・', 53=> ',', 54=> '՝', 55=> '،', 56=> '؍', 57=> '٫', 58=> '٬', 59=> '߸', 60=> '᠂', 61=> '᠈', 62=> '꓾', 63=> '꘍', 64=> '꛵', 65=> '︑', 66=> ';', 67=> '؛', 68=> '⁏', 69=> '꛶', 70=> ':', 71=> '։', 72=> '؞', 73=> '܃', 74=> '܄', 75=> '܅', 76=> '܆', 77=> '܇', 78=> '܈', 79=> '࠰', 80=> '࠱', 81=> '࠲', 82=> '࠳', 83=> '࠴', 84=> '࠵', 85=> '࠶', 86=> '࠷', 87=> '࠸', 88=> '࠹', 89=> '࠺', 90=> '࠻', 91=> '࠼', 92=> '࠽', 93=> '࠾', 94=> '፡', 95=> '፣', 96=> '፤', 97=> '፥', 98=> '፦', 99=> '᠄', 100=> '᠅', 101=> '༔', 102=> '៖', 103=> '᭝', 104=> '꧇', 105=> '᛫', 106=> '᛬', 107=> '᛭', 108=> '꛴', 109=> '!', 110=> '¡', 111=> '՜', 112=> '߹', 113=> '᥄', 114=> '?', 115=> '¿', 116=> '⸮', 117=> '՞', 118=> '؟', 119=> '܉', 120=> '፧', 121=> '᥅', 122=> '⳺', 123=> '⳻', 124=> '꘏', 125=> '꛷', 126=> '‽', 127=> '⸘', 128=> '.', 129=> '᠁', 130=> '۔', 131=> '܁', 132=> '܂', 133=> '።', 134=> '᠃', 135=> '᠉', 136=> '᙮', 137=> '᭜', 138=> '⳹', 139=> '⳾', 140=> '⸰', 141=> '꓿', 142=> '꘎', 143=> '꛳', 144=> '︒', 145=> '·', 146=> '⸱', 147=> '।', 148=> '॥', 149=> '꣎', 150=> '꣏', 151=> '᰻', 152=> '᰼', 153=> '꡶', 154=> '꡷', 155=> '᜵', 156=> '᜶', 157=> '꤯', 158=> '၊', 159=> '။', 160=> '។', 161=> '៕', 162=> '᪨', 163=> '᪩', 164=> '᪪', 165=> '᪫', 166=> '᭞', 167=> '᭟', 168=> '꧈', 169=> '꧉', 170=> '꩝', 171=> '꩞', 172=> '꩟', 173=> '꯫', 174=> '𐩖', 175=> '𐩗', 176=> '𑁇', 177=> '𑁈', 178=> '𑃀', 179=> '𑃁', 180=> '᱾', 181=> '᱿', 182=> '܀', 183=> '߷', 184=> '჻', 185=> '፠', 186=> '፨', 187=> '᨞', 188=> '᨟', 189=> '᭚', 190=> '᭛', 191=> '꧁', 192=> '꧂', 193=> '꧃', 194=> '꧄', 195=> '꧅', 196=> '꧆', 197=> '꧊', 198=> '꧋', 199=> '꧌', 200=> '꧍', 201=> '꛲', 202=> '꥟', 203=> '𐡗', 204=> '𐬺', 205=> '𐬻', 206=> '𐬼', 207=> '𐬽', 208=> '𐬾', 209=> '𐬿', 210=> '𑂾', 211=> '𑂿', 212=> '⁕', 213=> '⁖', 214=> '⁘', 215=> '⁙', 216=> '⁚', 217=> '⁛', 218=> '⁜', 219=> '⁝', 220=> '⁞', 221=> '⸪', 222=> '⸫', 223=> '⸬', 224=> '⸭', 225=> '⳼', 226=> '⳿', 227=> '⸙', 228=> '𐤿', 229=> '𐄀', 230=> '𐄁', 231=> '𐄂', 232=> '𐎟', 233=> '𐏐', 234=> '𐤟', 235=> '𒑰', 236=> '𒑱', 237=> '𒑲', 238=> '𒑳', 239=> '\'', 240=> '‘', 241=> '’', 242=> '‚', 243=> '‛', 244=> '‹', 245=> '›', 246=> '"', 247 => '“', 248 => '”', 249 => '„', 250 => '‟', 251 => '«', 252 => '»', 253 => '(', 254 => ')', 255 => '[', 256 => ']', 257 => '{', 258 => '}', 259 => '༺', 260 => '༻', 261 => '༼', 262 => '༽', 263 => '᚛', 264 => '᚜', 265 => '⁅', 266 => '⁆', 267 => '⧼', 268 => '⧽', 269 => '⦃', 270 => '⦄', 271 => '⦅', 272 => '⦆', 273 => '⦇', 274 => '⦈', 275 => '⦉', 276 => '⦊', 277 => '⦋', 278 => '⦌', 279 => '⦍', 280 => '⦎', 281 => '⦏', 282 => '⦐', 283 => '⦑', 284 => '⦒', 285 => '⦓', 286 => '⦔', 287 => '⦕', 288 => '⦖', 289 => '⦗', 290 => '⦘', 291 => '⟬', 292 => '⟭', 293 => '⟮', 294 => '⟯', 295 => '⸂', 296 => '⸃', 297 => '⸄', 298 => '⸅', 299 => '⸉', 300 => '⸊', 301 => '⸌', 302 => '⸍', 303 => '⸜', 304 => '⸝', 305 => '⸠', 306 => '⸡', 307 => '⸢', 308 => '⸣', 309 => '⸤', 310 => '⸥', 311 => '⸦', 312 => '⸧', 313 => '⸨', 314 => '⸩', 315 => '〈', 316 => '〉', 317 => '「', 318 => '」', 319 => '﹝', 320 => '﹞', 321 => '︗', 322 => '︘', 323 => '﴾', 324 => '﴿', 325 => '§', 326 => '¶', 327 => '⁋', 328 => '©', 329 => '®', 330 => '@', 331 => '*', 332 => '⁎', 333 => '⁑', 334 => '٭', 335 => '꙳', 336 => '/', 337 => '⁄', 338 => '\\', 339 => '&', 340 => '⅋', 341 => '⁊', 342 => '#', 343 => '%', 344 => '٪', 345 => '‰', 346 => '؉', 347 => '‱', 348 => '؊', 349 => '⁒', 350 => '†', 351 => '‡', 352 => '•', 353 => '‣', 354 => '‧', 355 => '⁃', 356 => '⁌', 357 => '⁍', 358 => '′', 359 => '‵', 360 => '‸', 361 => '※', 362 => '‿', 363 => '⁔', 364 => '⁀', 365 => '⁐', 366 => '⁁', 367 => '⁂', 368 => '⸀', 369 => '⸁', 370 => '⸆', 371 => '⸇', 372 => '⸈', 373 => '⸋', 374 => '⸎', 375 => '⸏', 376 => '⸐', 377 => '⸑', 378 => '⸒', 379 => '⸓', 380 => '⸔', 381 => '⸕', 382 => '⸖', 383 => '⸚', 384 => '⸛', 385 => '⸞', 386 => '⸟', 387 => '꙾', 388 => '՚', 389 => '՛', 390 => '՟', 391 => '־', 392 => '׀', 393 => '׃', 394 => '׆', 395 => '׳', 396 => '״', 397 => '܊', 398 => '܋', 399 => '܌', 400 => '܍', 401 => '࡞', 402 => '᠀', 403 => '॰', 404 => '꣸', 405 => '꣹', 406 => '꣺', 407 => '෴', 408 => '๚', 409 => '๛', 410 => '꫞', 411 => '꫟', 412 => '༄', 413 => '༅', 414 => '༆', 415 => '༇', 416 => '༈', 417 => '༉', 418 => '༊', 419 => '࿐', 420 => '࿑', 421 => '་', 422 => '།', 423 => '༎', 424 => '༏', 425 => '༐', 426 => '༑', 427 => '༒', 428 => '྅', 429 => '࿒', 430 => '࿓', 431 => '࿔', 432 => '࿙', 433 => '࿚', 434 => '᰽', 435 => '᰾', 436 => '᰿', 437 => '᥀', 438 => '၌', 439 => '၍', 440 => '၎', 441 => '၏', 442 => '႞', 443 => '႟', 444 => '꩷', 445 => '꩸', 446 => '꩹', 447 => 'ៗ', 448 => '៘', 449 => '៙', 450 => '៚', 451 => '᪠', 452 => '᪡', 453 => '᪢', 454 => '᪣', 455 => '᪤', 456 => '᪥', 457 => '᪦', 458 => '᪬', 459 => '᪭', 460 => '᙭', 461 => '⵰', 462 => '꡴', 463 => '꡵', 464 => '᯼', 465 => '᯽', 466 => '᯾', 467 => '᯿', 468 => '꤮', 469 => '꧞', 470 => '꧟', 471 => '꩜', 472 => '𑁉', 473 => '𑁊', 474 => '𑁋', 475 => '𑁌', 476 => '𑁍', 477 => '𐩐', 478 => '𐩑', 479 => '𐩒', 480 => '𐩓', 481 => '𐩔', 482 => '𐩕', 483 => '𐩘', 484 => '𐬹', 485 => '𑂻', 486 => '𑂼', 487 => 'ʹ', 488 => '͵', 489 => 'ʺ', 490 => '˂', 491 => '˃', 492 => '˄', 493 => '˅', 494 => 'ˆ', 495 => 'ˇ', 496 => 'ˈ', 497 => 'ˉ', 498 => 'ˊ', 499 => 'ˋ', 500 => 'ˌ', 501 => 'ˍ', 502 => 'ˎ', 503 => 'ˏ', 504 => '˒', 505 => '˓', 506 => '˔', 507 => '˕', 508 => '˖', 509 => '˗', 510 => '˞', 511 => '˟', 512 => '˥', 513 => '˦', 514 => '˧', 515 => '˨', 516 => '˩', 517 => '˪', 518 => '˫', 519 => 'ˬ', 520 => '˭', 521 => '˯', 522 => '˰', 523 => '˱', 524 => '˲', 525 => '˳', 526 => '˴', 527 => '˵', 528 => '˶', 529 => '˷', 530 => '˸', 531 => '˹', 532 => '˺', 533 => '˻', 534 => '˼', 535 => '˽', 536 => '˾', 537 => '˿', 538 => '᎐', 539 => '᎑', 540 => '᎒', 541 => '᎓', 542 => '᎔', 543 => '᎕', 544 => '᎖', 545 => '᎗', 546 => '᎘', 547 => '᎙', 548 => '꜀', 549 => '꜁', 550 => '꜂', 551 => '꜃', 552 => '꜄', 553 => '꜅', 554 => '꜆', 555 => '꜇', 556 => '꜈', 557 => '꜉', 558 => '꜊', 559 => '꜋', 560 => '꜌', 561 => '꜍', 562 => '꜎', 563 => '꜏', 564 => '꜐', 565 => '꜑', 566 => '꜒', 567 => '꜓', 568 => '꜔', 569 => '꜕', 570 => '꜖', 571 => 'ꜗ', 572 => 'ꜘ', 573 => 'ꜙ', 574 => 'ꜚ', 575 => 'ꜛ', 576 => 'ꜜ', 577 => 'ꜝ', 578 => 'ꜞ', 579 => 'ꜟ', 580 => '꜠', 581 => '꜡', 582 => 'ꞈ', 583 => '꞉', 584 => '꞊', 585 => '°', 586 => '҂', 587 => '؈', 588 => '؎', 589 => '؏', 590 => '۞', 591 => '۩', 592 => '﷽', 593 => '߶', 594 => '৺', 595 => '୰', 596 => '௳', 597 => '௴', 598 => '௵', 599 => '௶', 600 => '௷', 601 => '௸', 602 => '௺', 603 => '౿', 604 => '൹', 605 => '꠨', 606 => '꠩', 607 => '꠪', 608 => '꠫', 609 => '꠶', 610 => '꠷', 611 => '꠹', 612 => '๏', 613 => '༁', 614 => '༂', 615 => '༃', 616 => '༓', 617 => '༕', 618 => '༖', 619 => '༗', 620 => '༚', 621 => '༛', 622 => '༜', 623 => '༝', 624 => '༞', 625 => '༟', 626 => '༴', 627 => '༶', 628 => '༸', 629 => '྾', 630 => '྿', 631 => '࿀', 632 => '࿁', 633 => '࿂', 634 => '࿃', 635 => '࿄', 636 => '࿅', 637 => '࿇', 638 => '࿈', 639 => '࿉', 640 => '࿊', 641 => '࿋', 642 => '࿌', 643 => '࿎', 644 => '࿏', 645 => '࿕', 646 => '࿖', 647 => '࿗', 648 => '࿘', 649 => '᧠', 650 => '᧡', 651 => '᧢', 652 => '᧣', 653 => '᧤', 654 => '᧥', 655 => '᧦', 656 => '᧧', 657 => '᧨', 658 => '᧩', 659 => '᧪', 660 => '᧫', 661 => '᧬', 662 => '᧭', 663 => '᧮', 664 => '᧯', 665 => '᧰', 666 => '᧱', 667 => '᧲', 668 => '᧳', 669 => '᧴', 670 => '᧵', 671 => '᧶', 672 => '᧷', 673 => '᧸', 674 => '᧹', 675 => '᧺', 676 => '᧻', 677 => '᧼', 678 => '᧽', 679 => '᧾', 680 => '᧿', 681 => '᭡', 682 => '᭢', 683 => '᭣', 684 => '᭤', 685 => '᭥', 686 => '᭦', 687 => '᭧', 688 => '᭨', 689 => '᭩', 690 => '᭪', 691 => '᭴', 692 => '᭵', 693 => '᭶', 694 => '᭷', 695 => '᭸', 696 => '᭹', 697 => '᭺', 698 => '᭻', 699 => '᭼', 700 => '℄', 701 => '℈', 702 => '℔', 703 => '℗', 704 => '℘', 705 => '℞', 706 => '℟', 707 => '℣', 708 => '℥', 709 => '℧', 710 => '℩', 711 => '℮', 712 => '℺', 713 => '⅁', 714 => '⅂', 715 => '⅃', 716 => '⅄', 717 => '⅊', 718 => '⅌', 719 => '⅍', 720 => '⅏', 721 => '←', 722 => '→', 723 => '↑', 724 => '↓', 725 => '↔', 726 => '↕', 727 => '↖', 728 => '↗', 729 => '↘', 730 => '↙', 731 => '↜', 732 => '↝', 733 => '↞', 734 => '↟', 735 => '↠', 736 => '↡', 737 => '↢', 738 => '↣', 739 => '↤', 740 => '↥', 741 => '↦', 742 => '↧', 743 => '↨', 744 => '↩', 745 => '↪', 746 => '↫', 747 => '↬', 748 => '↭', 749 => '↯', 750 => '↰', 751 => '↱', 752 => '↲', 753 => '↳', 754 => '↴', 755 => '↵', 756 => '↶', 757 => '↷', 758 => '↸', 759 => '↹', 760 => '↺', 761 => '↻', 762 => '↼', 763 => '↽', 764 => '↾', 765 => '↿', 766 => '⇀', 767 => '⇁', 768 => '⇂', 769 => '⇃', 770 => '⇄', 771 => '⇅', 772 => '⇆', 773 => '⇇', 774 => '⇈', 775 => '⇉', 776 => '⇊', 777 => '⇋', 778 => '⇌', 779 => '⇐', 780 => '⇑', 781 => '⇒', 782 => '⇓', 783 => '⇔', 784 => '⇕', 785 => '⇖', 786 => '⇗', 787 => '⇘', 788 => '⇙', 789 => '⇚', 790 => '⇛', 791 => '⇜', 792 => '⇝', 793 => '⇞', 794 => '⇟', 795 => '⇠', 796 => '⇡', 797 => '⇢', 798 => '⇣', 799 => '⇤', 800 => '⇥', 801 => '⇦', 802 => '⇧', 803 => '⇨', 804 => '⇩', 805 => '⇪', 806 => '⇫', 807 => '⇬', 808 => '⇭', 809 => '⇮', 810 => '⇯', 811 => '⇰', 812 => '⇱', 813 => '⇲', 814 => '⇳', 815 => '⇴', 816 => '⇵', 817 => '⇶', 818 => '⇷', 819 => '⇸', 820 => '⇹', 821 => '⇺', 822 => '⇻', 823 => '⇼', 824 => '⇽', 825 => '⇾', 826 => '⇿', 827 => '∀', 828 => '∁', 829 => '∂', 830 => '∃', 831 => '∅', 832 => '∆', 833 => '∇', 834 => '∈', 835 => '∊', 836 => '∋', 837 => '∍', 838 => '϶', 839 => '∎', 840 => '∏', 841 => '∐', 842 => '∑', 843 => '+', 844 => '±', 845 => '÷', 846 => '×', 847 => '<', 848 => '=', 849 => '>', 850 => '¬', 851 => '|', 852 => '¦', 853 => '‖', 854 => '~', 855 => '−', 856 => '∓', 857 => '∔', 858 => '∕', 859 => '∖', 860 => '∗', 861 => '∘', 862 => '∙', 863 => '√', 864 => '∛', 865 => '؆', 866 => '∜', 867 => '؇', 868 => '∝', 869 => '∞', 870 => '∟', 871 => '∠', 872 => '∡', 873 => '∢', 874 => '∣', 875 => '∥', 876 => '∧', 877 => '∨', 878 => '∩', 879 => '∪', 880 => '∫', 881 => '∮', 882 => '∱', 883 => '∲', 884 => '∳', 885 => '∴', 886 => '∵', 887 => '∶', 888 => '∷', 889 => '∸', 890 => '∹', 891 => '∺', 892 => '∻', 893 => '∼', 894 => '∽', 895 => '∾', 896 => '∿', 897 => '≀', 898 => '≂', 899 => '≃', 900 => '≅', 901 => '≆', 902 => '≈', 903 => '≊', 904 => '≋', 905 => '≌', 906 => '≍', 907 => '≎', 908 => '≏', 909 => '≐', 910 => '≑', 911 => '≒', 912 => '≓', 913 => '≔', 914 => '≕', 915 => '≖', 916 => '≗', 917 => '≘', 918 => '≙', 919 => '≚', 920 => '≛', 921 => '≜', 922 => '≝', 923 => '≞', 924 => '≟', 925 => '≡', 926 => '≣', 927 => '≤', 928 => '≥', 929 => '≦', 930 => '≧', 931 => '≨', 932 => '≩', 933 => '≪', 934 => '≫', 935 => '≬', 936 => '≲', 937 => '≳', 938 => '≶', 939 => '≷', 940 => '≺', 941 => '≻', 942 => '≼', 943 => '≽', 944 => '≾', 945 => '≿', 946 => '⊂', 947 => '⊃', 948 => '⊆', 949 => '⊇', 950 => '⊊', 951 => '⊋', 952 => '⊌', 953 => '⊍', 954 => '⊎', 955 => '⊏', 956 => '⊐', 957 => '⊑', 958 => '⊒', 959 => '⊓', 960 => '⊔', 961 => '⊕', 962 => '⊖', 963 => '⊗', 964 => '⊘', 965 => '⊙', 966 => '⊚', 967 => '⊛', 968 => '⊜', 969 => '⊝', 970 => '⊞', 971 => '⊟', 972 => '⊠', 973 => '⊡', 974 => '⊢', 975 => '⊣', 976 => '⊤', 977 => '⊥', 978 => '⊦', 979 => '⊧', 980 => '⊨', 981 => '⊩', 982 => '⊪', 983 => '⊫', 984 => '⊰', 985 => '⊱', 986 => '⊲', 987 => '⊳', 988 => '⊴', 989 => '⊵', 990 => '⊶', 991 => '⊷', 992 => '⊸', 993 => '⊹', 994 => '⊺', 995 => '⊻', 996 => '⊼', 997 => '⊽', 998 => '⊾', 999 => '⊿', 1000 => '⋀', 1001 => '⋁', 1002 => '⋂', 1003 => '⋃', 1004 => '⋄', 1005 => '⋅', 1006 => '⋆', 1007 => '⋇', 1008 => '⋈', 1009 => '⋉', 1010 => '⋊', 1011 => '⋋', 1012 => '⋌', 1013 => '⋍', 1014 => '⋎', 1015 => '⋏', 1016 => '⋐', 1017 => '⋑', 1018 => '⋒', 1019 => '⋓', 1020 => '⋔', 1021 => '⋕', 1022 => '⋖', 1023 => '⋗', 1024 => '⋘', 1025 => '⋙', 1026 => '⋚', 1027 => '⋛', 1028 => '⋜', 1029 => '⋝', 1030 => '⋞', 1031 => '⋟', 1032 => '⋤', 1033 => '⋥', 1034 => '⋦', 1035 => '⋧', 1036 => '⋨', 1037 => '⋩', 1038 => '⋮', 1039 => '⋯', 1040 => '⋰', 1041 => '⋱', 1042 => '⋲', 1043 => '⋳', 1044 => '⋴', 1045 => '⋵', 1046 => '⋶', 1047 => '⋷', 1048 => '⋸', 1049 => '⋹', 1050 => '⋺', 1051 => '⋻', 1052 => '⋼', 1053 => '⋽', 1054 => '⋾', 1055 => '⋿', 1056 => '⌀', 1057 => '⌁', 1058 => '⌂', 1059 => '⌃', 1060 => '⌄', 1061 => '⌅', 1062 => '⌆', 1063 => '⌇', 1064 => '⌈', 1065 => '⌉', 1066 => '⌊', 1067 => '⌋', 1068 => '⌌', 1069 => '⌍', 1070 => '⌎', 1071 => '⌏', 1072 => '⌐', 1073 => '⌑', 1074 => '⌒', 1075 => '⌓', 1076 => '⌔', 1077 => '⌕', 1078 => '⌖', 1079 => '⌗', 1080 => '⌘', 1081 => '⌙', 1082 => '⌚', 1083 => '⌛', 1084 => '⌜', 1085 => '⌝', 1086 => '⌞', 1087 => '⌟', 1088 => '⌠', 1089 => '⌡', 1090 => '⌢', 1091 => '⌣', 1092 => '⌤', 1093 => '⌥', 1094 => '⌦', 1095 => '⌧', 1096 => '⌨', 1097 => '⌫', 1098 => '⌬', 1099 => '⌭', 1100 => '⌮', 1101 => '⌯', 1102 => '⌰', 1103 => '⌱', 1104 => '⌲', 1105 => '⌳', 1106 => '⌴', 1107 => '⌵', 1108 => '⌶', 1109 => '⌷', 1110 => '⌸', 1111 => '⌹', 1112 => '⌺', 1113 => '⌻', 1114 => '⌼', 1115 => '⌽', 1116 => '⌾', 1117 => '⌿', 1118 => '⍀', 1119 => '⍁', 1120 => '⍂', 1121 => '⍃', 1122 => '⍄', 1123 => '⍅', 1124 => '⍆', 1125 => '⍇', 1126 => '⍈', 1127 => '⍉', 1128 => '⍊', 1129 => '⍋', 1130 => '⍌', 1131 => '⍍', 1132 => '⍎', 1133 => '⍏', 1134 => '⍐', 1135 => '⍑', 1136 => '⍒', 1137 => '⍓', 1138 => '⍔', 1139 => '⍕', 1140 => '⍖', 1141 => '⍗', 1142 => '⍘', 1143 => '⍙', 1144 => '⍚', 1145 => '⍛', 1146 => '⍜', 1147 => '⍝', 1148 => '⍞', 1149 => '⍟', 1150 => '⍠', 1151 => '⍡', 1152 => '⍢', 1153 => '⍣', 1154 => '⍤', 1155 => '⍥', 1156 => '⍦', 1157 => '⍧', 1158 => '⍨', 1159 => '⍩', 1160 => '⍪', 1161 => '⍫', 1162 => '⍬', 1163 => '⍭', 1164 => '⍮', 1165 => '⍯', 1166 => '⍰', 1167 => '⍱', 1168 => '⍲', 1169 => '⍳', 1170 => '⍴', 1171 => '⍵', 1172 => '⍶', 1173 => '⍷', 1174 => '⍸', 1175 => '⍹', 1176 => '⍺', 1177 => '⍻', 1178 => '⍼', 1179 => '⍽', 1180 => '⍾', 1181 => '⍿', 1182 => '⎀', 1183 => '⎁', 1184 => '⎂', 1185 => '⎃', 1186 => '⎄', 1187 => '⎅', 1188 => '⎆', 1189 => '⎇', 1190 => '⎈', 1191 => '⎉', 1192 => '⎊', 1193 => '⎋', 1194 => '⎌', 1195 => '⎍', 1196 => '⎎', 1197 => '⎏', 1198 => '⎐', 1199 => '⎑', 1200 => '⎒', 1201 => '⎓', 1202 => '⎔', 1203 => '⎕', 1204 => '⎖', 1205 => '⎗', 1206 => '⎘', 1207 => '⎙', 1208 => '⎚', 1209 => '⎛', 1210 => '⎜', 1211 => '⎝', 1212 => '⎞', 1213 => '⎟', 1214 => '⎠', 1215 => '⎡', 1216 => '⎢', 1217 => '⎣', 1218 => '⎤', 1219 => '⎥', 1220 => '⎦', 1221 => '⎧', 1222 => '⎨', 1223 => '⎩', 1224 => '⎪', 1225 => '⎫', 1226 => '⎬', 1227 => '⎭', 1228 => '⎮', 1229 => '⎯', 1230 => '⎰', 1231 => '⎱', 1232 => '⎲', 1233 => '⎳', 1234 => '⎴', 1235 => '⎵', 1236 => '⎶', 1237 => '⎷', 1238 => '⎸', 1239 => '⎹', 1240 => '⎺', 1241 => '⎻', 1242 => '⎼', 1243 => '⎽', 1244 => '⎾', 1245 => '⎿', 1246 => '⏀', 1247 => '⏁', 1248 => '⏂', 1249 => '⏃', 1250 => '⏄', 1251 => '⏅', 1252 => '⏆', 1253 => '⏇', 1254 => '⏈', 1255 => '⏉', 1256 => '⏊', 1257 => '⏋', 1258 => '⏌', 1259 => '⏍', 1260 => '⏎', 1261 => '⏏', 1262 => '⏐', 1263 => '⏑', 1264 => '⏒', 1265 => '⏓', 1266 => '⏔', 1267 => '⏕', 1268 => '⏖', 1269 => '⏗', 1270 => '⏘', 1271 => '⏙', 1272 => '⏚', 1273 => '⏛', 1274 => '⏜', 1275 => '⏝', 1276 => '⏞', 1277 => '⏟', 1278 => '⏠', 1279 => '⏡', 1280 => '⏢', 1281 => '⏣', 1282 => '⏤', 1283 => '⏥', 1284 => '⏦', 1285 => '⏧', 1286 => '⏨', 1287 => '⏩', 1288 => '⏪', 1289 => '⏫', 1290 => '⏬', 1291 => '⏭', 1292 => '⏮', 1293 => '⏯', 1294 => '⏰', 1295 => '⏱', 1296 => '⏲', 1297 => '⏳', 1298 => '␀', 1299 => '␁', 1300 => '␂', 1301 => '␃', 1302 => '␄', 1303 => '␅', 1304 => '␆', 1305 => '␇', 1306 => '␈', 1307 => '␉', 1308 => '␊', 1309 => '␋', 1310 => '␌', 1311 => '␍', 1312 => '␎', 1313 => '␏', 1314 => '␐', 1315 => '␑', 1316 => '␒', 1317 => '␓', 1318 => '␔', 1319 => '␕', 1320 => '␖', 1321 => '␗', 1322 => '␘', 1323 => '␙', 1324 => '␚', 1325 => '␛', 1326 => '␜', 1327 => '␝', 1328 => '␞', 1329 => '␟', 1330 => '␠', 1331 => '␡', 1332 => '␢', 1333 => '␣', 1334 => '␤', 1335 => '␥', 1336 => '␦', 1337 => '⑀', 1338 => '⑁', 1339 => '⑂', 1340 => '⑃', 1341 => '⑄', 1342 => '⑅', 1343 => '⑆', 1344 => '⑇', 1345 => '⑈', 1346 => '⑉', 1347 => '⑊', 1348 => '─', 1349 => '━', 1350 => '│', 1351 => '┃', 1352 => '┄', 1353 => '┅', 1354 => '┆', 1355 => '┇', 1356 => '┈', 1357 => '┉', 1358 => '┊', 1359 => '┋', 1360 => '┌', 1361 => '┍', 1362 => '┎', 1363 => '┏', 1364 => '┐', 1365 => '┑', 1366 => '┒', 1367 => '┓', 1368 => '└', 1369 => '┕', 1370 => '┖', 1371 => '┗', 1372 => '┘', 1373 => '┙', 1374 => '┚', 1375 => '┛', 1376 => '├', 1377 => '┝', 1378 => '┞', 1379 => '┟', 1380 => '┠', 1381 => '┡', 1382 => '┢', 1383 => '┣', 1384 => '┤', 1385 => '┥', 1386 => '┦', 1387 => '┧', 1388 => '┨', 1389 => '┩', 1390 => '┪', 1391 => '┫', 1392 => '┬', 1393 => '┭', 1394 => '┮', 1395 => '┯', 1396 => '┰', 1397 => '┱', 1398 => '┲', 1399 => '┳', 1400 => '┴', 1401 => '┵', 1402 => '┶', 1403 => '┷', 1404 => '┸', 1405 => '┹', 1406 => '┺', 1407 => '┻', 1408 => '┼', 1409 => '┽', 1410 => '┾', 1411 => '┿', 1412 => '╀', 1413 => '╁', 1414 => '╂', 1415 => '╃', 1416 => '╄', 1417 => '╅', 1418 => '╆', 1419 => '╇', 1420 => '╈', 1421 => '╉', 1422 => '╊', 1423 => '╋', 1424 => '╌', 1425 => '╍', 1426 => '╎', 1427 => '╏', 1428 => '═', 1429 => '║', 1430 => '╒', 1431 => '╓', 1432 => '╔', 1433 => '╕', 1434 => '╖', 1435 => '╗', 1436 => '╘', 1437 => '╙', 1438 => '╚', 1439 => '╛', 1440 => '╜', 1441 => '╝', 1442 => '╞', 1443 => '╟', 1444 => '╠', 1445 => '╡', 1446 => '╢', 1447 => '╣', 1448 => '╤', 1449 => '╥', 1450 => '╦', 1451 => '╧', 1452 => '╨', 1453 => '╩', 1454 => '╪', 1455 => '╫', 1456 => '╬', 1457 => '╭', 1458 => '╮', 1459 => '╯', 1460 => '╰', 1461 => '╱', 1462 => '╲', 1463 => '╳', 1464 => '╴', 1465 => '╵', 1466 => '╶', 1467 => '╷', 1468 => '╸', 1469 => '╹', 1470 => '╺', 1471 => '╻', 1472 => '╼', 1473 => '╽', 1474 => '╾', 1475 => '╿', 1476 => '▀', 1477 => '▁', 1478 => '▂', 1479 => '▃', 1480 => '▄', 1481 => '▅', 1482 => '▆', 1483 => '▇', 1484 => '█', 1485 => '▉', 1486 => '▊', 1487 => '▋', 1488 => '▌', 1489 => '▍', 1490 => '▎', 1491 => '▏', 1492 => '▐', 1493 => '░', 1494 => '▒', 1495 => '▓', 1496 => '▔', 1497 => '▕', 1498 => '▖', 1499 => '▗', 1500 => '▘', 1501 => '▙', 1502 => '▚', 1503 => '▛', 1504 => '▜', 1505 => '▝', 1506 => '▞', 1507 => '▟', 1508 => '■', 1509 => '□', 1510 => '▢', 1511 => '▣', 1512 => '▤', 1513 => '▥', 1514 => '▦', 1515 => '▧', 1516 => '▨', 1517 => '▩', 1518 => '▪', 1519 => '▫', 1520 => '▬', 1521 => '▭', 1522 => '▮', 1523 => '▯', 1524 => '▰', 1525 => '▱', 1526 => '▲', 1527 => '△', 1528 => '▴', 1529 => '▵', 1530 => '▶', 1531 => '▷', 1532 => '▸', 1533 => '▹', 1534 => '►', 1535 => '▻', 1536 => '▼', 1537 => '▽', 1538 => '▾', 1539 => '▿', 1540 => '◀', 1541 => '◁', 1542 => '◂', 1543 => '◃', 1544 => '◄', 1545 => '◅', 1546 => '◆', 1547 => '◇', 1548 => '◈', 1549 => '◉', 1550 => '◊', 1551 => '○', 1552 => '◌', 1553 => '◍', 1554 => '◎', 1555 => '●', 1556 => '◐', 1557 => '◑', 1558 => '◒', 1559 => '◓', 1560 => '◔', 1561 => '◕', 1562 => '◖', 1563 => '◗', 1564 => '◘', 1565 => '◙', 1566 => '◚', 1567 => '◛', 1568 => '◜', 1569 => '◝', 1570 => '◞', 1571 => '◟', 1572 => '◠', 1573 => '◡', 1574 => '◢', 1575 => '◣', 1576 => '◤', 1577 => '◥', 1578 => '◦', 1579 => '◧', 1580 => '◨', 1581 => '◩', 1582 => '◪', 1583 => '◫', 1584 => '◬', 1585 => '◭', 1586 => '◮', 1587 => '◯', 1588 => '◰', 1589 => '◱', 1590 => '◲', 1591 => '◳', 1592 => '◴', 1593 => '◵', 1594 => '◶', 1595 => '◷', 1596 => '◸', 1597 => '◹', 1598 => '◺', 1599 => '◻', 1600 => '◼', 1601 => '◽', 1602 => '◾', 1603 => '◿', 1604 => '☀', 1605 => '☁', 1606 => '☂', 1607 => '☃', 1608 => '☄', 1609 => '★', 1610 => '☆', 1611 => '☇', 1612 => '☈', 1613 => '☉', 1614 => '☊', 1615 => '☋', 1616 => '☌', 1617 => '☍', 1618 => '☎', 1619 => '☏', 1620 => '☐', 1621 => '☑', 1622 => '☒', 1623 => '☓', 1624 => '☔', 1625 => '☕', 1626 => '☖', 1627 => '☗', 1628 => '☘', 1629 => '☙', 1630 => '☚', 1631 => '☛', 1632 => '☜', 1633 => '☝', 1634 => '☞', 1635 => '☟', 1636 => '☠', 1637 => '☡', 1638 => '☢', 1639 => '☣', 1640 => '☤', 1641 => '☥', 1642 => '☦', 1643 => '☧', 1644 => '☨', 1645 => '☩', 1646 => '☪', 1647 => '☫', 1648 => '☬', 1649 => '☭', 1650 => '☮', 1651 => '☯', 1652 => '☸', 1653 => '☹', 1654 => '☺', 1655 => '☻', 1656 => '☼', 1657 => '☽', 1658 => '☾', 1659 => '☿', 1660 => '♀', 1661 => '♁', 1662 => '♂', 1663 => '♃', 1664 => '♄', 1665 => '♅', 1666 => '♆', 1667 => '♇', 1668 => '♈', 1669 => '♉', 1670 => '♊', 1671 => '♋', 1672 => '♌', 1673 => '♍', 1674 => '♎', 1675 => '♏', 1676 => '♐', 1677 => '♑', 1678 => '♒', 1679 => '♓', 1680 => '♔', 1681 => '♕', 1682 => '♖', 1683 => '♗', 1684 => '♘', 1685 => '♙', 1686 => '♚', 1687 => '♛', 1688 => '♜', 1689 => '♝', 1690 => '♞', 1691 => '♟', 1692 => '♠', 1693 => '♡', 1694 => '♢', 1695 => '♣', 1696 => '♤', 1697 => '♥', 1698 => '♦', 1699 => '♧', 1700 => '♨', 1701 => '♩', 1702 => '♪', 1703 => '♫', 1704 => '♬', 1705 => '♰', 1706 => '♱', 1707 => '♲', 1708 => '♳', 1709 => '♴', 1710 => '♵', 1711 => '♶', 1712 => '♷', 1713 => '♸', 1714 => '♹', 1715 => '♺', 1716 => '♻', 1717 => '♼', 1718 => '♽', 1719 => '♾', 1720 => '♿', 1721 => '⚀', 1722 => '⚁', 1723 => '⚂', 1724 => '⚃', 1725 => '⚄', 1726 => '⚅', 1727 => '⚆', 1728 => '⚇', 1729 => '⚈', 1730 => '⚉', 1731 => '⚐', 1732 => '⚑', 1733 => '⚒', 1734 => '⚓', 1735 => '⚔', 1736 => '⚕', 1737 => '⚖', 1738 => '⚗', 1739 => '⚘', 1740 => '⚙', 1741 => '⚚', 1742 => '⚛', 1743 => '⚜', 1744 => '⚝', 1745 => '⚞', 1746 => '⚟', 1747 => '⚠', 1748 => '⚡', 1749 => '⚢', 1750 => '⚣', 1751 => '⚤', 1752 => '⚥', 1753 => '⚦', 1754 => '⚧', 1755 => '⚨', 1756 => '⚩', 1757 => '⚪', 1758 => '⚫', 1759 => '⚬', 1760 => '⚭', 1761 => '⚮', 1762 => '⚯', 1763 => '⚰', 1764 => '⚱', 1765 => '⚲', 1766 => '⚳', 1767 => '⚴', 1768 => '⚵', 1769 => '⚶', 1770 => '⚷', 1771 => '⚸', 1772 => '⚹', 1773 => '⚺', 1774 => '⚻', 1775 => '⚼', 1776 => '⚽', 1777 => '⚾', 1778 => '⚿', 1779 => '⛀', 1780 => '⛁', 1781 => '⛂', 1782 => '⛃', 1783 => '⛄', 1784 => '⛅', 1785 => '⛆', 1786 => '⛇', 1787 => '⛈', 1788 => '⛉', 1789 => '⛊', 1790 => '⛋', 1791 => '⛌', 1792 => '⛍', 1793 => '⛎', 1794 => '⛏', 1795 => '⛐', 1796 => '⛑', 1797 => '⛒', 1798 => '⛓', 1799 => '⛔', 1800 => '⛕', 1801 => '⛖', 1802 => '⛗', 1803 => '⛘', 1804 => '⛙', 1805 => '⛚', 1806 => '⛛', 1807 => '⛜', 1808 => '⛝', 1809 => '⛞', 1810 => '⛟', 1811 => '⛠', 1812 => '⛡', 1813 => '⛢', 1814 => '⛣', 1815 => '⛤', 1816 => '⛥', 1817 => '⛦', 1818 => '⛧', 1819 => '⛨', 1820 => '⛩', 1821 => '⛪', 1822 => '⛫', 1823 => '⛬', 1824 => '⛭', 1825 => '⛮', 1826 => '⛯', 1827 => '⛰', 1828 => '⛱', 1829 => '⛲', 1830 => '⛳', 1831 => '⛴', 1832 => '⛵', 1833 => '⛶', 1834 => '⛷', 1835 => '⛸', 1836 => '⛹', 1837 => '⛺', 1838 => '⛻', 1839 => '⛼', 1840 => '⛽', 1841 => '⛾', 1842 => '⛿', 1843 => '✁', 1844 => '✂', 1845 => '✃', 1846 => '✄', 1847 => '✅', 1848 => '✆', 1849 => '✇', 1850 => '✈', 1851 => '✉', 1852 => '✊', 1853 => '✋', 1854 => '✌', 1855 => '✍', 1856 => '✎', 1857 => '✏', 1858 => '✐', 1859 => '✑', 1860 => '✒', 1861 => '✓', 1862 => '✔', 1863 => '✕', 1864 => '✖', 1865 => '✗', 1866 => '✘', 1867 => '✙', 1868 => '✚', 1869 => '✛', 1870 => '✜', 1871 => '✝', 1872 => '✞', 1873 => '✟', 1874 => '✠', 1875 => '✡', 1876 => '✢', 1877 => '✣', 1878 => '✤', 1879 => '✥', 1880 => '✦', 1881 => '✧', 1882 => '✨', 1883 => '✩', 1884 => '✪', 1885 => '✫', 1886 => '✬', 1887 => '✭', 1888 => '✮', 1889 => '✯', 1890 => '✰', 1891 => '✱', 1892 => '✲', 1893 => '✳', 1894 => '✴', 1895 => '✵', 1896 => '✶', 1897 => '✷', 1898 => '✸', 1899 => '✹', 1900 => '✺', 1901 => '✻', 1902 => '✼', 1903 => '✽', 1904 => '✾', 1905 => '✿', 1906 => '❀', 1907 => '❁', 1908 => '❂', 1909 => '❃', 1910 => '❄', 1911 => '❅', 1912 => '❆', 1913 => '❇', 1914 => '❈', 1915 => '❉', 1916 => '❊', 1917 => '❋', 1918 => '❌', 1919 => '❍', 1920 => '❎', 1921 => '❏', 1922 => '❐', 1923 => '❑', 1924 => '❒', 1925 => '❓', 1926 => '❔', 1927 => '❕', 1928 => '❖', 1929 => '❗', 1930 => '❘', 1931 => '❙', 1932 => '❚', 1933 => '❛', 1934 => '❜', 1935 => '❝', 1936 => '❞', 1937 => '❟', 1938 => '❠', 1939 => '❡', 1940 => '❢', 1941 => '❣', 1942 => '❤', 1943 => '❥', 1944 => '❦', 1945 => '❧', 1946 => '❨', 1947 => '❩', 1948 => '❪', 1949 => '❫', 1950 => '❬', 1951 => '❭', 1952 => '❮', 1953 => '❯', 1954 => '❰', 1955 => '❱', 1956 => '❲', 1957 => '❳', 1958 => '❴', 1959 => '❵', 1960 => '➔', 1961 => '➕', 1962 => '➖', 1963 => '➗', 1964 => '➘', 1965 => '➙', 1966 => '➚', 1967 => '➛', 1968 => '➜', 1969 => '➝', 1970 => '➞', 1971 => '➟', 1972 => '➠', 1973 => '➡', 1974 => '➢', 1975 => '➣', 1976 => '➤', 1977 => '➥', 1978 => '➦', 1979 => '➧', 1980 => '➨', 1981 => '➩', 1982 => '➪', 1983 => '➫', 1984 => '➬', 1985 => '➭', 1986 => '➮', 1987 => '➯', 1988 => '➰', 1989 => '➱', 1990 => '➲', 1991 => '➳', 1992 => '➴', 1993 => '➵', 1994 => '➶', 1995 => '➷', 1996 => '➸', 1997 => '➹', 1998 => '➺', 1999 => '➻', 2000 => '➼', 2001 => '➽', 2002 => '➾', 2003 => '➿', 2004 => '⟀', 2005 => '⟁', 2006 => '⟂', 2007 => '⟃', 2008 => '⟄', 2009 => '⟅', 2010 => '⟆', 2011 => '⟇', 2012 => '⟈', 2013 => '⟉', 2014 => '⟊', 2015 => '⟌', 2016 => '⟎', 2017 => '⟏', 2018 => '⟐', 2019 => '⟑', 2020 => '⟒', 2021 => '⟓', 2022 => '⟔', 2023 => '⟕', 2024 => '⟖', 2025 => '⟗', 2026 => '⟘', 2027 => '⟙', 2028 => '⟚', 2029 => '⟛', 2030 => '⟜', 2031 => '⟝', 2032 => '⟞', 2033 => '⟟', 2034 => '⟠', 2035 => '⟡', 2036 => '⟢', 2037 => '⟣', 2038 => '⟤', 2039 => '⟥', 2040 => '⟦', 2041 => '⟧', 2042 => '⟨', 2043 => '⟩', 2044 => '⟪', 2045 => '⟫', 2046 => '⟰', 2047 => '⟱', 2048 => '⟲', 2049 => '⟳', 2050 => '⟴', 2051 => '⟵', 2052 => '⟶', 2053 => '⟷', 2054 => '⟸', 2055 => '⟹', 2056 => '⟺', 2057 => '⟻', 2058 => '⟼', 2059 => '⟽', 2060 => '⟾', 2061 => '⟿', 2062 => '⤀', 2063 => '⤁', 2064 => '⤂', 2065 => '⤃', 2066 => '⤄', 2067 => '⤅', 2068 => '⤆', 2069 => '⤇', 2070 => '⤈', 2071 => '⤉', 2072 => '⤊', 2073 => '⤋', 2074 => '⤌', 2075 => '⤍', 2076 => '⤎', 2077 => '⤏', 2078 => '⤐', 2079 => '⤑', 2080 => '⤒', 2081 => '⤓', 2082 => '⤔', 2083 => '⤕', 2084 => '⤖', 2085 => '⤗', 2086 => '⤘', 2087 => '⤙', 2088 => '⤚', 2089 => '⤛', 2090 => '⤜', 2091 => '⤝', 2092 => '⤞', 2093 => '⤟', 2094 => '⤠', 2095 => '⤡', 2096 => '⤢', 2097 => '⤣', 2098 => '⤤', 2099 => '⤥', 2100 => '⤦', 2101 => '⤧', 2102 => '⤨', 2103 => '⤩', 2104 => '⤪', 2105 => '⤫', 2106 => '⤬', 2107 => '⤭', 2108 => '⤮', 2109 => '⤯', 2110 => '⤰', 2111 => '⤱', 2112 => '⤲', 2113 => '⤳', 2114 => '⤴', 2115 => '⤵', 2116 => '⤶', 2117 => '⤷', 2118 => '⤸', 2119 => '⤹', 2120 => '⤺', 2121 => '⤻', 2122 => '⤼', 2123 => '⤽', 2124 => '⤾', 2125 => '⤿', 2126 => '⥀', 2127 => '⥁', 2128 => '⥂', 2129 => '⥃', 2130 => '⥄', 2131 => '⥅', 2132 => '⥆', 2133 => '⥇', 2134 => '⥈', 2135 => '⥉', 2136 => '⥊', 2137 => '⥋', 2138 => '⥌', 2139 => '⥍', 2140 => '⥎', 2141 => '⥏', 2142 => '⥐', 2143 => '⥑', 2144 => '⥒', 2145 => '⥓', 2146 => '⥔', 2147 => '⥕', 2148 => '⥖', 2149 => '⥗', 2150 => '⥘', 2151 => '⥙', 2152 => '⥚', 2153 => '⥛', 2154 => '⥜', 2155 => '⥝', 2156 => '⥞', 2157 => '⥟', 2158 => '⥠', 2159 => '⥡', 2160 => '⥢', 2161 => '⥣', 2162 => '⥤', 2163 => '⥥', 2164 => '⥦', 2165 => '⥧', 2166 => '⥨', 2167 => '⥩', 2168 => '⥪', 2169 => '⥫', 2170 => '⥬', 2171 => '⥭', 2172 => '⥮', 2173 => '⥯', 2174 => '⥰', 2175 => '⥱', 2176 => '⥲', 2177 => '⥳', 2178 => '⥴', 2179 => '⥵', 2180 => '⥶', 2181 => '⥷', 2182 => '⥸', 2183 => '⥹', 2184 => '⥺', 2185 => '⥻', 2186 => '⥼', 2187 => '⥽', 2188 => '⥾', 2189 => '⥿', 2190 => '⦀', 2191 => '⦁', 2192 => '⦂', 2193 => '⦙', 2194 => '⦚', 2195 => '⦛', 2196 => '⦜', 2197 => '⦝', 2198 => '⦞', 2199 => '⦟', 2200 => '⦠', 2201 => '⦡', 2202 => '⦢', 2203 => '⦣', 2204 => '⦤', 2205 => '⦥', 2206 => '⦦', 2207 => '⦧', 2208 => '⦨', 2209 => '⦩', 2210 => '⦪', 2211 => '⦫', 2212 => '⦬', 2213 => '⦭', 2214 => '⦮', 2215 => '⦯', 2216 => '⦰', 2217 => '⦱', 2218 => '⦲', 2219 => '⦳', 2220 => '⦴', 2221 => '⦵', 2222 => '⦶', 2223 => '⦷', 2224 => '⦸', 2225 => '⦹', 2226 => '⦺', 2227 => '⦻', 2228 => '⦼', 2229 => '⦽', 2230 => '⦾', 2231 => '⦿', 2232 => '⧀', 2233 => '⧁', 2234 => '⧂', 2235 => '⧃', 2236 => '⧄', 2237 => '⧅', 2238 => '⧆', 2239 => '⧇', 2240 => '⧈', 2241 => '⧉', 2242 => '⧊', 2243 => '⧋', 2244 => '⧌', 2245 => '⧍', 2246 => '⧎', 2247 => '⧏', 2248 => '⧐', 2249 => '⧑', 2250 => '⧒', 2251 => '⧓', 2252 => '⧔', 2253 => '⧕', 2254 => '⧖', 2255 => '⧗', 2256 => '⧘', 2257 => '⧙', 2258 => '⧚', 2259 => '⧛', 2260 => '⧜', 2261 => '⧝', 2262 => '⧞', 2263 => '⧟', 2264 => '⧠', 2265 => '⧡', 2266 => '⧢', 2267 => '⧣', 2268 => '⧤', 2269 => '⧥', 2270 => '⧦', 2271 => '⧧', 2272 => '⧨', 2273 => '⧩', 2274 => '⧪', 2275 => '⧫', 2276 => '⧬', 2277 => '⧭', 2278 => '⧮', 2279 => '⧯', 2280 => '⧰', 2281 => '⧱', 2282 => '⧲', 2283 => '⧳', 2284 => '⧴', 2285 => '⧵', 2286 => '⧶', 2287 => '⧷', 2288 => '⧸', 2289 => '⧹', 2290 => '⧺', 2291 => '⧻', 2292 => '⧾', 2293 => '⧿', 2294 => '⨀', 2295 => '⨁', 2296 => '⨂', 2297 => '⨃', 2298 => '⨄', 2299 => '⨅', 2300 => '⨆', 2301 => '⨇', 2302 => '⨈', 2303 => '⨉', 2304 => '⨊', 2305 => '⨋', 2306 => '⨍', 2307 => '⨎', 2308 => '⨏', 2309 => '⨐', 2310 => '⨑', 2311 => '⨒', 2312 => '⨓', 2313 => '⨔', 2314 => '⨕', 2315 => '⨖', 2316 => '⨗', 2317 => '⨘', 2318 => '⨙', 2319 => '⨚', 2320 => '⨛', 2321 => '⨜', 2322 => '⨝', 2323 => '⨞', 2324 => '⨟', 2325 => '⨠', 2326 => '⨡', 2327 => '⨢', 2328 => '⨣', 2329 => '⨤', 2330 => '⨥', 2331 => '⨦', 2332 => '⨧', 2333 => '⨨', 2334 => '⨩', 2335 => '⨪', 2336 => '⨫', 2337 => '⨬', 2338 => '⨭', 2339 => '⨮', 2340 => '⨯', 2341 => '⨰', 2342 => '⨱', 2343 => '⨲', 2344 => '⨳', 2345 => '⨴', 2346 => '⨵', 2347 => '⨶', 2348 => '⨷', 2349 => '⨸', 2350 => '⨹', 2351 => '⨺', 2352 => '⨻', 2353 => '⨼', 2354 => '⨽', 2355 => '⨾', 2356 => '⨿', 2357 => '⩀', 2358 => '⩁', 2359 => '⩂', 2360 => '⩃', 2361 => '⩄', 2362 => '⩅', 2363 => '⩆', 2364 => '⩇', 2365 => '⩈', 2366 => '⩉', 2367 => '⩊', 2368 => '⩋', 2369 => '⩌', 2370 => '⩍', 2371 => '⩎', 2372 => '⩏', 2373 => '⩐', 2374 => '⩑', 2375 => '⩒', 2376 => '⩓', 2377 => '⩔', 2378 => '⩕', 2379 => '⩖', 2380 => '⩗', 2381 => '⩘', 2382 => '⩙', 2383 => '⩚', 2384 => '⩛', 2385 => '⩜', 2386 => '⩝', 2387 => '⩞', 2388 => '⩟', 2389 => '⩠', 2390 => '⩡', 2391 => '⩢', 2392 => '⩣', 2393 => '⩤', 2394 => '⩥', 2395 => '⩦', 2396 => '⩧', 2397 => '⩨', 2398 => '⩩', 2399 => '⩪', 2400 => '⩫', 2401 => '⩬', 2402 => '⩭', 2403 => '⩮', 2404 => '⩯', 2405 => '⩰', 2406 => '⩱', 2407 => '⩲', 2408 => '⩳', 2409 => '⩷', 2410 => '⩸', 2411 => '⩹', 2412 => '⩺', 2413 => '⩻', 2414 => '⩼', 2415 => '⩽', 2416 => '⩾', 2417 => '⩿', 2418 => '⪀', 2419 => '⪁', 2420 => '⪂', 2421 => '⪃', 2422 => '⪄', 2423 => '⪅', 2424 => '⪆', 2425 => '⪇', 2426 => '⪈', 2427 => '⪉', 2428 => '⪊', 2429 => '⪋', 2430 => '⪌', 2431 => '⪍', 2432 => '⪎', 2433 => '⪏', 2434 => '⪐', 2435 => '⪑', 2436 => '⪒', 2437 => '⪓', 2438 => '⪔', 2439 => '⪕', 2440 => '⪖', 2441 => '⪗', 2442 => '⪘', 2443 => '⪙', 2444 => '⪚', 2445 => '⪛', 2446 => '⪜', 2447 => '⪝', 2448 => '⪞', 2449 => '⪟', 2450 => '⪠', 2451 => '⪡', 2452 => '⪢', 2453 => '⪣', 2454 => '⪤', 2455 => '⪥', 2456 => '⪦', 2457 => '⪧', 2458 => '⪨', 2459 => '⪩', 2460 => '⪪', 2461 => '⪫', 2462 => '⪬', 2463 => '⪭', 2464 => '⪮', 2465 => '⪯', 2466 => '⪰', 2467 => '⪱', 2468 => '⪲', 2469 => '⪳', 2470 => '⪴', 2471 => '⪵', 2472 => '⪶', 2473 => '⪷', 2474 => '⪸', 2475 => '⪹', 2476 => '⪺', 2477 => '⪻', 2478 => '⪼', 2479 => '⪽', 2480 => '⪾', 2481 => '⪿', 2482 => '⫀', 2483 => '⫁', 2484 => '⫂', 2485 => '⫃', 2486 => '⫄', 2487 => '⫅', 2488 => '⫆', 2489 => '⫇', 2490 => '⫈', 2491 => '⫉', 2492 => '⫊', 2493 => '⫋', 2494 => '⫌', 2495 => '⫍', 2496 => '⫎', 2497 => '⫏', 2498 => '⫐', 2499 => '⫑', 2500 => '⫒', 2501 => '⫓', 2502 => '⫔', 2503 => '⫕', 2504 => '⫖', 2505 => '⫗', 2506 => '⫘', 2507 => '⫙', 2508 => '⫚', 2509 => '⫛', 2510 => '⫝', 2511 => '⫞', 2512 => '⫟', 2513 => '⫠', 2514 => '⫡', 2515 => '⫢', 2516 => '⫣', 2517 => '⫤', 2518 => '⫥', 2519 => '⫦', 2520 => '⫧', 2521 => '⫨', 2522 => '⫩', 2523 => '⫪', 2524 => '⫫', 2525 => '⫬', 2526 => '⫭', 2527 => '⫮', 2528 => '⫯', 2529 => '⫰', 2530 => '⫱', 2531 => '⫲', 2532 => '⫳', 2533 => '⫴', 2534 => '⫵', 2535 => '⫶', 2536 => '⫷', 2537 => '⫸', 2538 => '⫹', 2539 => '⫺', 2540 => '⫻', 2541 => '⫼', 2542 => '⫽', 2543 => '⫾', 2544 => '⫿', 2545 => '⬀', 2546 => '⬁', 2547 => '⬂', 2548 => '⬃', 2549 => '⬄', 2550 => '⬅', 2551 => '⬆', 2552 => '⬇', 2553 => '⬈', 2554 => '⬉', 2555 => '⬊', 2556 => '⬋', 2557 => '⬌', 2558 => '⬍', 2559 => '⬎', 2560 => '⬏', 2561 => '⬐', 2562 => '⬑', 2563 => '⬒', 2564 => '⬓', 2565 => '⬔', 2566 => '⬕', 2567 => '⬖', 2568 => '⬗', 2569 => '⬘', 2570 => '⬙', 2571 => '⬚', 2572 => '⬛', 2573 => '⬜', 2574 => '⬝', 2575 => '⬞', 2576 => '⬟', 2577 => '⬠', 2578 => '⬡', 2579 => '⬢', 2580 => '⬣', 2581 => '⬤', 2582 => '⬥', 2583 => '⬦', 2584 => '⬧', 2585 => '⬨', 2586 => '⬩', 2587 => '⬪', 2588 => '⬫', 2589 => '⬬', 2590 => '⬭', 2591 => '⬮', 2592 => '⬯', 2593 => '⬰', 2594 => '⬱', 2595 => '⬲', 2596 => '⬳', 2597 => '⬴', 2598 => '⬵', 2599 => '⬶', 2600 => '⬷', 2601 => '⬸', 2602 => '⬹', 2603 => '⬺', 2604 => '⬻', 2605 => '⬼', 2606 => '⬽', 2607 => '⬾', 2608 => '⬿', 2609 => '⭀', 2610 => '⭁', 2611 => '⭂', 2612 => '⭃', 2613 => '⭄', 2614 => '⭅', 2615 => '⭆', 2616 => '⭇', 2617 => '⭈', 2618 => '⭉', 2619 => '⭊', 2620 => '⭋', 2621 => '⭌', 2622 => '⭐', 2623 => '⭑', 2624 => '⭒', 2625 => '⭓', 2626 => '⭔', 2627 => '⭕', 2628 => '⭖', 2629 => '⭗', 2630 => '⭘', 2631 => '⭙', 2632 => '⳥', 2633 => '⳦', 2634 => '⳧', 2635 => '⳨', 2636 => '⳩', 2637 => '⳪', 2638 => '⠀', 2639 => '⠁', 2640 => '⠂', 2641 => '⠃', 2642 => '⠄', 2643 => '⠅', 2644 => '⠆', 2645 => '⠇', 2646 => '⠈', 2647 => '⠉', 2648 => '⠊', 2649 => '⠋', 2650 => '⠌', 2651 => '⠍', 2652 => '⠎', 2653 => '⠏', 2654 => '⠐', 2655 => '⠑', 2656 => '⠒', 2657 => '⠓', 2658 => '⠔', 2659 => '⠕', 2660 => '⠖', 2661 => '⠗', 2662 => '⠘', 2663 => '⠙', 2664 => '⠚', 2665 => '⠛', 2666 => '⠜', 2667 => '⠝', 2668 => '⠞', 2669 => '⠟', 2670 => '⠠', 2671 => '⠡', 2672 => '⠢', 2673 => '⠣', 2674 => '⠤', 2675 => '⠥', 2676 => '⠦', 2677 => '⠧', 2678 => '⠨', 2679 => '⠩', 2680 => '⠪', 2681 => '⠫', 2682 => '⠬', 2683 => '⠭', 2684 => '⠮', 2685 => '⠯', 2686 => '⠰', 2687 => '⠱', 2688 => '⠲', 2689 => '⠳', 2690 => '⠴', 2691 => '⠵', 2692 => '⠶', 2693 => '⠷', 2694 => '⠸', 2695 => '⠹', 2696 => '⠺', 2697 => '⠻', 2698 => '⠼', 2699 => '⠽', 2700 => '⠾', 2701 => '⠿', 2702 => '⡀', 2703 => '⡁', 2704 => '⡂', 2705 => '⡃', 2706 => '⡄', 2707 => '⡅', 2708 => '⡆', 2709 => '⡇', 2710 => '⡈', 2711 => '⡉', 2712 => '⡊', 2713 => '⡋', 2714 => '⡌', 2715 => '⡍', 2716 => '⡎', 2717 => '⡏', 2718 => '⡐', 2719 => '⡑', 2720 => '⡒', 2721 => '⡓', 2722 => '⡔', 2723 => '⡕', 2724 => '⡖', 2725 => '⡗', 2726 => '⡘', 2727 => '⡙', 2728 => '⡚', 2729 => '⡛', 2730 => '⡜', 2731 => '⡝', 2732 => '⡞', 2733 => '⡟', 2734 => '⡠', 2735 => '⡡', 2736 => '⡢', 2737 => '⡣', 2738 => '⡤', 2739 => '⡥', 2740 => '⡦', 2741 => '⡧', 2742 => '⡨', 2743 => '⡩', 2744 => '⡪', 2745 => '⡫', 2746 => '⡬', 2747 => '⡭', 2748 => '⡮', 2749 => '⡯', 2750 => '⡰', 2751 => '⡱', 2752 => '⡲', 2753 => '⡳', 2754 => '⡴', 2755 => '⡵', 2756 => '⡶', 2757 => '⡷', 2758 => '⡸', 2759 => '⡹', 2760 => '⡺', 2761 => '⡻', 2762 => '⡼', 2763 => '⡽', 2764 => '⡾', 2765 => '⡿', 2766 => '⢀', 2767 => '⢁', 2768 => '⢂', 2769 => '⢃', 2770 => '⢄', 2771 => '⢅', 2772 => '⢆', 2773 => '⢇', 2774 => '⢈', 2775 => '⢉', 2776 => '⢊', 2777 => '⢋', 2778 => '⢌', 2779 => '⢍', 2780 => '⢎', 2781 => '⢏', 2782 => '⢐', 2783 => '⢑', 2784 => '⢒', 2785 => '⢓', 2786 => '⢔', 2787 => '⢕', 2788 => '⢖', 2789 => '⢗', 2790 => '⢘', 2791 => '⢙', 2792 => '⢚', 2793 => '⢛', 2794 => '⢜', 2795 => '⢝', 2796 => '⢞', 2797 => '⢟', 2798 => '⢠', 2799 => '⢡', 2800 => '⢢', 2801 => '⢣', 2802 => '⢤', 2803 => '⢥', 2804 => '⢦', 2805 => '⢧', 2806 => '⢨', 2807 => '⢩', 2808 => '⢪', 2809 => '⢫', 2810 => '⢬', 2811 => '⢭', 2812 => '⢮', 2813 => '⢯', 2814 => '⢰', 2815 => '⢱', 2816 => '⢲', 2817 => '⢳', 2818 => '⢴', 2819 => '⢵', 2820 => '⢶', 2821 => '⢷', 2822 => '⢸', 2823 => '⢹', 2824 => '⢺', 2825 => '⢻', 2826 => '⢼', 2827 => '⢽', 2828 => '⢾', 2829 => '⢿', 2830 => '⣀', 2831 => '⣁', 2832 => '⣂', 2833 => '⣃', 2834 => '⣄', 2835 => '⣅', 2836 => '⣆', 2837 => '⣇', 2838 => '⣈', 2839 => '⣉', 2840 => '⣊', 2841 => '⣋', 2842 => '⣌', 2843 => '⣍', 2844 => '⣎', 2845 => '⣏', 2846 => '⣐', 2847 => '⣑', 2848 => '⣒', 2849 => '⣓', 2850 => '⣔', 2851 => '⣕', 2852 => '⣖', 2853 => '⣗', 2854 => '⣘', 2855 => '⣙', 2856 => '⣚', 2857 => '⣛', 2858 => '⣜', 2859 => '⣝', 2860 => '⣞', 2861 => '⣟', 2862 => '⣠', 2863 => '⣡', 2864 => '⣢', 2865 => '⣣', 2866 => '⣤', 2867 => '⣥', 2868 => '⣦', 2869 => '⣧', 2870 => '⣨', 2871 => '⣩', 2872 => '⣪', 2873 => '⣫', 2874 => '⣬', 2875 => '⣭', 2876 => '⣮', 2877 => '⣯', 2878 => '⣰', 2879 => '⣱', 2880 => '⣲', 2881 => '⣳', 2882 => '⣴', 2883 => '⣵', 2884 => '⣶', 2885 => '⣷', 2886 => '⣸', 2887 => '⣹', 2888 => '⣺', 2889 => '⣻', 2890 => '⣼', 2891 => '⣽', 2892 => '⣾', 2893 => '⣿', 2894 => '⚊', 2895 => '⚋', 2896 => '⚌', 2897 => '⚍', 2898 => '⚎', 2899 => '⚏', 2900 => '☰', 2901 => '☱', 2902 => '☲', 2903 => '☳', 2904 => '☴', 2905 => '☵', 2906 => '☶', 2907 => '☷', 2908 => '䷀', 2909 => '䷁', 2910 => '䷂', 2911 => '䷃', 2912 => '䷄', 2913 => '䷅', 2914 => '䷆', 2915 => '䷇', 2916 => '䷈', 2917 => '䷉', 2918 => '䷊', 2919 => '䷋', 2920 => '䷌', 2921 => '䷍', 2922 => '䷎', 2923 => '䷏', 2924 => '䷐', 2925 => '䷑', 2926 => '䷒', 2927 => '䷓', 2928 => '䷔', 2929 => '䷕', 2930 => '䷖', 2931 => '䷗', 2932 => '䷘', 2933 => '䷙', 2934 => '䷚', 2935 => '䷛', 2936 => '䷜', 2937 => '䷝', 2938 => '䷞', 2939 => '䷟', 2940 => '䷠', 2941 => '䷡', 2942 => '䷢', 2943 => '䷣', 2944 => '䷤', 2945 => '䷥', 2946 => '䷦', 2947 => '䷧', 2948 => '䷨', 2949 => '䷩', 2950 => '䷪', 2951 => '䷫', 2952 => '䷬', 2953 => '䷭', 2954 => '䷮', 2955 => '䷯', 2956 => '䷰', 2957 => '䷱', 2958 => '䷲', 2959 => '䷳', 2960 => '䷴', 2961 => '䷵', 2962 => '䷶', 2963 => '䷷', 2964 => '䷸', 2965 => '䷹', 2966 => '䷺', 2967 => '䷻', 2968 => '䷼', 2969 => '䷽', 2970 => '䷾', 2971 => '䷿', 2972 => '𝌀', 2973 => '𝌁', 2974 => '𝌂', 2975 => '𝌃', 2976 => '𝌄', 2977 => '𝌅', 2978 => '𝌆', 2979 => '𝌇', 2980 => '𝌈', 2981 => '𝌉', 2982 => '𝌊', 2983 => '𝌋', 2984 => '𝌌', 2985 => '𝌍', 2986 => '𝌎', 2987 => '𝌏', 2988 => '𝌐', 2989 => '𝌑', 2990 => '𝌒', 2991 => '𝌓', 2992 => '𝌔', 2993 => '𝌕', 2994 => '𝌖', 2995 => '𝌗', 2996 => '𝌘', 2997 => '𝌙', 2998 => '𝌚', 2999 => '𝌛', 3000 => '𝌜', 3001 => '𝌝', 3002 => '𝌞', 3003 => '𝌟', 3004 => '𝌠', 3005 => '𝌡', 3006 => '𝌢', 3007 => '𝌣', 3008 => '𝌤', 3009 => '𝌥', 3010 => '𝌦', 3011 => '𝌧', 3012 => '𝌨', 3013 => '𝌩', 3014 => '𝌪', 3015 => '𝌫', 3016 => '𝌬', 3017 => '𝌭', 3018 => '𝌮', 3019 => '𝌯', 3020 => '𝌰', 3021 => '𝌱', 3022 => '𝌲', 3023 => '𝌳', 3024 => '𝌴', 3025 => '𝌵', 3026 => '𝌶', 3027 => '𝌷', 3028 => '𝌸', 3029 => '𝌹', 3030 => '𝌺', 3031 => '𝌻', 3032 => '𝌼', 3033 => '𝌽', 3034 => '𝌾', 3035 => '𝌿', 3036 => '𝍀', 3037 => '𝍁', 3038 => '𝍂', 3039 => '𝍃', 3040 => '𝍄', 3041 => '𝍅', 3042 => '𝍆', 3043 => '𝍇', 3044 => '𝍈', 3045 => '𝍉', 3046 => '𝍊', 3047 => '𝍋', 3048 => '𝍌', 3049 => '𝍍', 3050 => '𝍎', 3051 => '𝍏', 3052 => '𝍐', 3053 => '𝍑', 3054 => '𝍒', 3055 => '𝍓', 3056 => '𝍔', 3057 => '𝍕', 3058 => '𝍖', 3059 => '꒐', 3060 => '꒑', 3061 => '꒒', 3062 => '꒓', 3063 => '꒔', 3064 => '꒕', 3065 => '꒖', 3066 => '꒗', 3067 => '꒘', 3068 => '꒙', 3069 => '꒚', 3070 => '꒛', 3071 => '꒜', 3072 => '꒝', 3073 => '꒞', 3074 => '꒟', 3075 => '꒠', 3076 => '꒡', 3077 => '꒢', 3078 => '꒣', 3079 => '꒤', 3080 => '꒥', 3081 => '꒦', 3082 => '꒧', 3083 => '꒨', 3084 => '꒩', 3085 => '꒪', 3086 => '꒫', 3087 => '꒬', 3088 => '꒭', 3089 => '꒮', 3090 => '꒯', 3091 => '꒰', 3092 => '꒱', 3093 => '꒲', 3094 => '꒳', 3095 => '꒴', 3096 => '꒵', 3097 => '꒶', 3098 => '꒷', 3099 => '꒸', 3100 => '꒹', 3101 => '꒺', 3102 => '꒻', 3103 => '꒼', 3104 => '꒽', 3105 => '꒾', 3106 => '꒿', 3107 => '꓀', 3108 => '꓁', 3109 => '꓂', 3110 => '꓃', 3111 => '꓄', 3112 => '꓅', 3113 => '꓆', 3114 => '𐄷', 3115 => '𐄸', 3116 => '𐄹', 3117 => '𐄺', 3118 => '𐄻', 3119 => '𐄼', 3120 => '𐄽', 3121 => '𐄾', 3122 => '𐄿', 3123 => '𐅹', 3124 => '𐅺', 3125 => '𐅻', 3126 => '𐅼', 3127 => '𐅽', 3128 => '𐅾', 3129 => '𐅿', 3130 => '𐆀', 3131 => '𐆁', 3132 => '𐆂', 3133 => '𐆃', 3134 => '𐆄', 3135 => '𐆅', 3136 => '𐆆', 3137 => '𐆇', 3138 => '𐆈', 3139 => '𐆉', 3140 => '𐆐', 3141 => '𐆑', 3142 => '𐆒', 3143 => '𐆓', 3144 => '𐆔', 3145 => '𐆕', 3146 => '𐆖', 3147 => '𐆗', 3148 => '𐆘', 3149 => '𐆙', 3150 => '𐆚', 3151 => '𐆛', 3152 => '𐇐', 3153 => '𐇑', 3154 => '𐇒', 3155 => '𐇓', 3156 => '𐇔', 3157 => '𐇕', 3158 => '𐇖', 3159 => '𐇗', 3160 => '𐇘', 3161 => '𐇙', 3162 => '𐇚', 3163 => '𐇛', 3164 => '𐇜', 3165 => '𐇝', 3166 => '𐇞', 3167 => '𐇟', 3168 => '𐇠', 3169 => '𐇡', 3170 => '𐇢', 3171 => '𐇣', 3172 => '𐇤', 3173 => '𐇥', 3174 => '𐇦', 3175 => '𐇧', 3176 => '𐇨', 3177 => '𐇩', 3178 => '𐇪', 3179 => '𐇫', 3180 => '𐇬', 3181 => '𐇭', 3182 => '𐇮', 3183 => '𐇯', 3184 => '𐇰', 3185 => '𐇱', 3186 => '𐇲', 3187 => '𐇳', 3188 => '𐇴', 3189 => '𐇵', 3190 => '𐇶', 3191 => '𐇷', 3192 => '𐇸', 3193 => '𐇹', 3194 => '𐇺', 3195 => '𐇻', 3196 => '𐇼', 3197 => '𝀀', 3198 => '𝀁', 3199 => '𝀂', 3200 => '𝀃', 3201 => '𝀄', 3202 => '𝀅', 3203 => '𝀆', 3204 => '𝀇', 3205 => '𝀈', 3206 => '𝀉', 3207 => '𝀊', 3208 => '𝀋', 3209 => '𝀌', 3210 => '𝀍', 3211 => '𝀎', 3212 => '𝀏', 3213 => '𝀐', 3214 => '𝀑', 3215 => '𝀒', 3216 => '𝀓', 3217 => '𝀔', 3218 => '𝀕', 3219 => '𝀖', 3220 => '𝀗', 3221 => '𝀘', 3222 => '𝀙', 3223 => '𝀚', 3224 => '𝀛', 3225 => '𝀜', 3226 => '𝀝', 3227 => '𝀞', 3228 => '𝀟', 3229 => '𝀠', 3230 => '𝀡', 3231 => '𝀢', 3232 => '𝀣', 3233 => '𝀤', 3234 => '𝀥', 3235 => '𝀦', 3236 => '𝀧', 3237 => '𝀨', 3238 => '𝀩', 3239 => '𝀪', 3240 => '𝀫', 3241 => '𝀬', 3242 => '𝀭', 3243 => '𝀮', 3244 => '𝀯', 3245 => '𝀰', 3246 => '𝀱', 3247 => '𝀲', 3248 => '𝀳', 3249 => '𝀴', 3250 => '𝀵', 3251 => '𝀶', 3252 => '𝀷', 3253 => '𝀸', 3254 => '𝀹', 3255 => '𝀺', 3256 => '𝀻', 3257 => '𝀼', 3258 => '𝀽', 3259 => '𝀾', 3260 => '𝀿', 3261 => '𝁀', 3262 => '𝁁', 3263 => '𝁂', 3264 => '𝁃', 3265 => '𝁄', 3266 => '𝁅', 3267 => '𝁆', 3268 => '𝁇', 3269 => '𝁈', 3270 => '𝁉', 3271 => '𝁊', 3272 => '𝁋', 3273 => '𝁌', 3274 => '𝁍', 3275 => '𝁎', 3276 => '𝁏', 3277 => '𝁐', 3278 => '𝁑', 3279 => '𝁒', 3280 => '𝁓', 3281 => '𝁔', 3282 => '𝁕', 3283 => '𝁖', 3284 => '𝁗', 3285 => '𝁘', 3286 => '𝁙', 3287 => '𝁚', 3288 => '𝁛', 3289 => '𝁜', 3290 => '𝁝', 3291 => '𝁞', 3292 => '𝁟', 3293 => '𝁠', 3294 => '𝁡', 3295 => '𝁢', 3296 => '𝁣', 3297 => '𝁤', 3298 => '𝁥', 3299 => '𝁦', 3300 => '𝁧', 3301 => '𝁨', 3302 => '𝁩', 3303 => '𝁪', 3304 => '𝁫', 3305 => '𝁬', 3306 => '𝁭', 3307 => '𝁮', 3308 => '𝁯', 3309 => '𝁰', 3310 => '𝁱', 3311 => '𝁲', 3312 => '𝁳', 3313 => '𝁴', 3314 => '𝁵', 3315 => '𝁶', 3316 => '𝁷', 3317 => '𝁸', 3318 => '𝁹', 3319 => '𝁺', 3320 => '𝁻', 3321 => '𝁼', 3322 => '𝁽', 3323 => '𝁾', 3324 => '𝁿', 3325 => '𝂀', 3326 => '𝂁', 3327 => '𝂂', 3328 => '𝂃', 3329 => '𝂄', 3330 => '𝂅', 3331 => '𝂆', 3332 => '𝂇', 3333 => '𝂈', 3334 => '𝂉', 3335 => '𝂊', 3336 => '𝂋', 3337 => '𝂌', 3338 => '𝂍', 3339 => '𝂎', 3340 => '𝂏', 3341 => '𝂐', 3342 => '𝂑', 3343 => '𝂒', 3344 => '𝂓', 3345 => '𝂔', 3346 => '𝂕', 3347 => '𝂖', 3348 => '𝂗', 3349 => '𝂘', 3350 => '𝂙', 3351 => '𝂚', 3352 => '𝂛', 3353 => '𝂜', 3354 => '𝂝', 3355 => '𝂞', 3356 => '𝂟', 3357 => '𝂠', 3358 => '𝂡', 3359 => '𝂢', 3360 => '𝂣', 3361 => '𝂤', 3362 => '𝂥', 3363 => '𝂦', 3364 => '𝂧', 3365 => '𝂨', 3366 => '𝂩', 3367 => '𝂪', 3368 => '𝂫', 3369 => '𝂬', 3370 => '𝂭', 3371 => '𝂮', 3372 => '𝂯', 3373 => '𝂰', 3374 => '𝂱', 3375 => '𝂲', 3376 => '𝂳', 3377 => '𝂴', 3378 => '𝂵', 3379 => '𝂶', 3380 => '𝂷', 3381 => '𝂸', 3382 => '𝂹', 3383 => '𝂺', 3384 => '𝂻', 3385 => '𝂼', 3386 => '𝂽', 3387 => '𝂾', 3388 => '𝂿', 3389 => '𝃀', 3390 => '𝃁', 3391 => '𝃂', 3392 => '𝃃', 3393 => '𝃄', 3394 => '𝃅', 3395 => '𝃆', 3396 => '𝃇', 3397 => '𝃈', 3398 => '𝃉', 3399 => '𝃊', 3400 => '𝃋', 3401 => '𝃌', 3402 => '𝃍', 3403 => '𝃎', 3404 => '𝃏', 3405 => '𝃐', 3406 => '𝃑', 3407 => '𝃒', 3408 => '𝃓', 3409 => '𝃔', 3410 => '𝃕', 3411 => '𝃖', 3412 => '𝃗', 3413 => '𝃘', 3414 => '𝃙', 3415 => '𝃚', 3416 => '𝃛', 3417 => '𝃜', 3418 => '𝃝', 3419 => '𝃞', 3420 => '𝃟', 3421 => '𝃠', 3422 => '𝃡', 3423 => '𝃢', 3424 => '𝃣', 3425 => '𝃤', 3426 => '𝃥', 3427 => '𝃦', 3428 => '𝃧', 3429 => '𝃨', 3430 => '𝃩', 3431 => '𝃪', 3432 => '𝃫', 3433 => '𝃬', 3434 => '𝃭', 3435 => '𝃮', 3436 => '𝃯', 3437 => '𝃰', 3438 => '𝃱', 3439 => '𝃲', 3440 => '𝃳', 3441 => '𝃴', 3442 => '𝃵', 3443 => '𝄀', 3444 => '𝄁', 3445 => '𝄂', 3446 => '𝄃', 3447 => '𝄄', 3448 => '𝄅', 3449 => '𝄆', 3450 => '𝄇', 3451 => '𝄈', 3452 => '𝄉', 3453 => '𝄊', 3454 => '𝄋', 3455 => '𝄌', 3456 => '𝄍', 3457 => '𝄎', 3458 => '𝄏', 3459 => '𝄐', 3460 => '𝄑', 3461 => '𝄒', 3462 => '𝄓', 3463 => '𝄔', 3464 => '𝄕', 3465 => '𝄖', 3466 => '𝄗', 3467 => '𝄘', 3468 => '𝄙', 3469 => '𝄚', 3470 => '𝄛', 3471 => '𝄜', 3472 => '𝄝', 3473 => '𝄞', 3474 => '𝄟', 3475 => '𝄠', 3476 => '𝄡', 3477 => '𝄢', 3478 => '𝄣', 3479 => '𝄤', 3480 => '𝄥', 3481 => '𝄦', 3482 => '♭', 3483 => '♮', 3484 => '♯', 3485 => '𝄪', 3486 => '𝄫', 3487 => '𝄬', 3488 => '𝄭', 3489 => '𝄮', 3490 => '𝄯', 3491 => '𝄰', 3492 => '𝄱', 3493 => '𝄲', 3494 => '𝄳', 3495 => '𝄴', 3496 => '𝄵', 3497 => '𝄶', 3498 => '𝄷', 3499 => '𝄸', 3500 => '𝄹', 3501 => '𝄩', 3502 => '𝄺', 3503 => '𝄻', 3504 => '𝄼', 3505 => '𝄽', 3506 => '𝄾', 3507 => '𝄿', 3508 => '𝅀', 3509 => '𝅁', 3510 => '𝅂', 3511 => '𝅃', 3512 => '𝅄', 3513 => '𝅅', 3514 => '𝅆', 3515 => '𝅇', 3516 => '𝅈', 3517 => '𝅉', 3518 => '𝅊', 3519 => '𝅋', 3520 => '𝅌', 3521 => '𝅍', 3522 => '𝅎', 3523 => '𝅏', 3524 => '𝅐', 3525 => '𝅑', 3526 => '𝅒', 3527 => '𝅓', 3528 => '𝅔', 3529 => '𝅕', 3530 => '𝅖', 3531 => '𝅗', 3532 => '𝅘', 3533 => '𝅙', 3534 => '𝅚', 3535 => '𝅛', 3536 => '𝅜', 3537 => '𝅝', 3538 => '𝅪', 3539 => '𝅫', 3540 => '𝅬', 3541 => '𝆃', 3542 => '𝆄', 3543 => '𝆌', 3544 => '𝆍', 3545 => '𝆎', 3546 => '𝆏', 3547 => '𝆐', 3548 => '𝆑', 3549 => '𝆒', 3550 => '𝆓', 3551 => '𝆔', 3552 => '𝆕', 3553 => '𝆖', 3554 => '𝆗', 3555 => '𝆘', 3556 => '𝆙', 3557 => '𝆚', 3558 => '𝆛', 3559 => '𝆜', 3560 => '𝆝', 3561 => '𝆞', 3562 => '𝆟', 3563 => '𝆠', 3564 => '𝆡', 3565 => '𝆢', 3566 => '𝆣', 3567 => '𝆤', 3568 => '𝆥', 3569 => '𝆦', 3570 => '𝆧', 3571 => '𝆨', 3572 => '𝆩', 3573 => '𝆮', 3574 => '𝆯', 3575 => '𝆰', 3576 => '𝆱', 3577 => '𝆲', 3578 => '𝆳', 3579 => '𝆴', 3580 => '𝆵', 3581 => '𝆶', 3582 => '𝆷', 3583 => '𝆸', 3584 => '𝆹', 3585 => '𝆺', 3586 => '𝇁', 3587 => '𝇂', 3588 => '𝇃', 3589 => '𝇄', 3590 => '𝇅', 3591 => '𝇆', 3592 => '𝇇', 3593 => '𝇈', 3594 => '𝇉', 3595 => '𝇊', 3596 => '𝇋', 3597 => '𝇌', 3598 => '𝇍', 3599 => '𝇎', 3600 => '𝇏', 3601 => '𝇐', 3602 => '𝇑', 3603 => '𝇒', 3604 => '𝇓', 3605 => '𝇔', 3606 => '𝇕', 3607 => '𝇖', 3608 => '𝇗', 3609 => '𝇘', 3610 => '𝇙', 3611 => '𝇚', 3612 => '𝇛', 3613 => '𝇜', 3614 => '𝇝', 3615 => '𝈀', 3616 => '𝈁', 3617 => '𝈂', 3618 => '𝈃', 3619 => '𝈄', 3620 => '𝈅', 3621 => '𝈆', 3622 => '𝈇', 3623 => '𝈈', 3624 => '𝈉', 3625 => '𝈊', 3626 => '𝈋', 3627 => '𝈌', 3628 => '𝈍', 3629 => '𝈎', 3630 => '𝈏', 3631 => '𝈐', 3632 => '𝈑', 3633 => '𝈒', 3634 => '𝈓', 3635 => '𝈔', 3636 => '𝈕', 3637 => '𝈖', 3638 => '𝈗', 3639 => '𝈘', 3640 => '𝈙', 3641 => '𝈚', 3642 => '𝈛', 3643 => '𝈜', 3644 => '𝈝', 3645 => '𝈞', 3646 => '𝈟', 3647 => '𝈠', 3648 => '𝈡', 3649 => '𝈢', 3650 => '𝈣', 3651 => '𝈤', 3652 => '𝈥', 3653 => '𝈦', 3654 => '𝈧', 3655 => '𝈨', 3656 => '𝈩', 3657 => '𝈪', 3658 => '𝈫', 3659 => '𝈬', 3660 => '𝈭', 3661 => '𝈮', 3662 => '𝈯', 3663 => '𝈰', 3664 => '𝈱', 3665 => '𝈲', 3666 => '𝈳', 3667 => '𝈴', 3668 => '𝈵', 3669 => '𝈶', 3670 => '𝈷', 3671 => '𝈸', 3672 => '𝈹', 3673 => '𝈺', 3674 => '𝈻', 3675 => '𝈼', 3676 => '𝈽', 3677 => '𝈾', 3678 => '𝈿', 3679 => '𝉀', 3680 => '𝉁', 3681 => '𝉅', 3682 => '🀀', 3683 => '🀁', 3684 => '🀂', 3685 => '🀃', 3686 => '🀄', 3687 => '🀅', 3688 => '🀆', 3689 => '🀇', 3690 => '🀈', 3691 => '🀉', 3692 => '🀊', 3693 => '🀋', 3694 => '🀌', 3695 => '🀍', 3696 => '🀎', 3697 => '🀏', 3698 => '🀐', 3699 => '🀑', 3700 => '🀒', 3701 => '🀓', 3702 => '🀔', 3703 => '🀕', 3704 => '🀖', 3705 => '🀗', 3706 => '🀘', 3707 => '🀙', 3708 => '🀚', 3709 => '🀛', 3710 => '🀜', 3711 => '🀝', 3712 => '🀞', 3713 => '🀟', 3714 => '🀠', 3715 => '🀡', 3716 => '🀢', 3717 => '🀣', 3718 => '🀤', 3719 => '🀥', 3720 => '🀦', 3721 => '🀧', 3722 => '🀨', 3723 => '🀩', 3724 => '🀪', 3725 => '🀫', 3726 => '🀰', 3727 => '🀱', 3728 => '🀲', 3729 => '🀳', 3730 => '🀴', 3731 => '🀵', 3732 => '🀶', 3733 => '🀷', 3734 => '🀸', 3735 => '🀹', 3736 => '🀺', 3737 => '🀻', 3738 => '🀼', 3739 => '🀽', 3740 => '🀾', 3741 => '🀿', 3742 => '🁀', 3743 => '🁁', 3744 => '🁂', 3745 => '🁃', 3746 => '🁄', 3747 => '🁅', 3748 => '🁆', 3749 => '🁇', 3750 => '🁈', 3751 => '🁉', 3752 => '🁊', 3753 => '🁋', 3754 => '🁌', 3755 => '🁍', 3756 => '🁎', 3757 => '🁏', 3758 => '🁐', 3759 => '🁑', 3760 => '🁒', 3761 => '🁓', 3762 => '🁔', 3763 => '🁕', 3764 => '🁖', 3765 => '🁗', 3766 => '🁘', 3767 => '🁙', 3768 => '🁚', 3769 => '🁛', 3770 => '🁜', 3771 => '🁝', 3772 => '🁞', 3773 => '🁟', 3774 => '🁠', 3775 => '🁡', 3776 => '🁢', 3777 => '🁣', 3778 => '🁤', 3779 => '🁥', 3780 => '🁦', 3781 => '🁧', 3782 => '🁨', 3783 => '🁩', 3784 => '🁪', 3785 => '🁫', 3786 => '🁬', 3787 => '🁭', 3788 => '🁮', 3789 => '🁯', 3790 => '🁰', 3791 => '🁱', 3792 => '🁲', 3793 => '🁳', 3794 => '🁴', 3795 => '🁵', 3796 => '🁶', 3797 => '🁷', 3798 => '🁸', 3799 => '🁹', 3800 => '🁺', 3801 => '🁻', 3802 => '🁼', 3803 => '🁽', 3804 => '🁾', 3805 => '🁿', 3806 => '🂀', 3807 => '🂁', 3808 => '🂂', 3809 => '🂃', 3810 => '🂄', 3811 => '🂅', 3812 => '🂆', 3813 => '🂇', 3814 => '🂈', 3815 => '🂉', 3816 => '🂊', 3817 => '🂋', 3818 => '🂌', 3819 => '🂍', 3820 => '🂎', 3821 => '🂏', 3822 => '🂐', 3823 => '🂑', 3824 => '🂒', 3825 => '🂓', 3826 => '🂠', 3827 => '🂡', 3828 => '🂢', 3829 => '🂣', 3830 => '🂤', 3831 => '🂥', 3832 => '🂦', 3833 => '🂧', 3834 => '🂨', 3835 => '🂩', 3836 => '🂪', 3837 => '🂫', 3838 => '🂬', 3839 => '🂭', 3840 => '🂮', 3841 => '🂱', 3842 => '🂲', 3843 => '🂳', 3844 => '🂴', 3845 => '🂵', 3846 => '🂶', 3847 => '🂷', 3848 => '🂸', 3849 => '🂹', 3850 => '🂺', 3851 => '🂻', 3852 => '🂼', 3853 => '🂽', 3854 => '🂾', 3855 => '🃁', 3856 => '🃂', 3857 => '🃃', 3858 => '🃄', 3859 => '🃅', 3860 => '🃆', 3861 => '🃇', 3862 => '🃈', 3863 => '🃉', 3864 => '🃊', 3865 => '🃋', 3866 => '🃌', 3867 => '🃍', 3868 => '🃎', 3869 => '🃏', 3870 => '🃑', 3871 => '🃒', 3872 => '🃓', 3873 => '🃔', 3874 => '🃕', 3875 => '🃖', 3876 => '🃗', 3877 => '🃘', 3878 => '🃙', 3879 => '🃚', 3880 => '🃛', 3881 => '🃜', 3882 => '🃝', 3883 => '🃞', 3884 => '🃟', 3885 => '🌀', 3886 => '🌁', 3887 => '🌂', 3888 => '🌃', 3889 => '🌄', 3890 => '🌅', 3891 => '🌆', 3892 => '🌇', 3893 => '🌈', 3894 => '🌉', 3895 => '🌊', 3896 => '🌋', 3897 => '🌌', 3898 => '🌍', 3899 => '🌎', 3900 => '🌏', 3901 => '🌐', 3902 => '🌑', 3903 => '🌒', 3904 => '🌓', 3905 => '🌔', 3906 => '🌕', 3907 => '🌖', 3908 => '🌗', 3909 => '🌘', 3910 => '🌙', 3911 => '🌚', 3912 => '🌛', 3913 => '🌜', 3914 => '🌝', 3915 => '🌞', 3916 => '🌟', 3917 => '🌠', 3918 => '🌰', 3919 => '🌱', 3920 => '🌲', 3921 => '🌳', 3922 => '🌴', 3923 => '🌵', 3924 => '🌷', 3925 => '🌸', 3926 => '🌹', 3927 => '🌺', 3928 => '🌻', 3929 => '🌼', 3930 => '🌽', 3931 => '🌾', 3932 => '🌿', 3933 => '🍀', 3934 => '🍁', 3935 => '🍂', 3936 => '🍃', 3937 => '🍄', 3938 => '🍅', 3939 => '🍆', 3940 => '🍇', 3941 => '🍈', 3942 => '🍉', 3943 => '🍊', 3944 => '🍋', 3945 => '🍌', 3946 => '🍍', 3947 => '🍎', 3948 => '🍏', 3949 => '🍐', 3950 => '🍑', 3951 => '🍒', 3952 => '🍓', 3953 => '🍔', 3954 => '🍕', 3955 => '🍖', 3956 => '🍗', 3957 => '🍘', 3958 => '🍙', 3959 => '🍚', 3960 => '🍛', 3961 => '🍜', 3962 => '🍝', 3963 => '🍞', 3964 => '🍟', 3965 => '🍠', 3966 => '🍡', 3967 => '🍢', 3968 => '🍣', 3969 => '🍤', 3970 => '🍥', 3971 => '🍦', 3972 => '🍧', 3973 => '🍨', 3974 => '🍩', 3975 => '🍪', 3976 => '🍫', 3977 => '🍬', 3978 => '🍭', 3979 => '🍮', 3980 => '🍯', 3981 => '🍰', 3982 => '🍱', 3983 => '🍲', 3984 => '🍳', 3985 => '🍴', 3986 => '🍵', 3987 => '🍶', 3988 => '🍷', 3989 => '🍸', 3990 => '🍹', 3991 => '🍺', 3992 => '🍻', 3993 => '🍼', 3994 => '🎀', 3995 => '🎁', 3996 => '🎂', 3997 => '🎃', 3998 => '🎄', 3999 => '🎅', 4000 => '🎆', 4001 => '🎇', 4002 => '🎈', 4003 => '🎉', 4004 => '🎊', 4005 => '🎋', 4006 => '🎌', 4007 => '🎍', 4008 => '🎎', 4009 => '🎏', 4010 => '🎐', 4011 => '🎑', 4012 => '🎒', 4013 => '🎓', 4014 => '🎠', 4015 => '🎡', 4016 => '🎢', 4017 => '🎣', 4018 => '🎤', 4019 => '🎥', 4020 => '🎦', 4021 => '🎧', 4022 => '🎨', 4023 => '🎩', 4024 => '🎪', 4025 => '🎫', 4026 => '🎬', 4027 => '🎭', 4028 => '🎮', 4029 => '🎯', 4030 => '🎰', 4031 => '🎱', 4032 => '🎲', 4033 => '🎳', 4034 => '🎴', 4035 => '🎵', 4036 => '🎶', 4037 => '🎷', 4038 => '🎸', 4039 => '🎹', 4040 => '🎺', 4041 => '🎻', 4042 => '🎼', 4043 => '🎽', 4044 => '🎾', 4045 => '🎿', 4046 => '🏀', 4047 => '🏁', 4048 => '🏂', 4049 => '🏃', 4050 => '🏄', 4051 => '🏆', 4052 => '🏇', 4053 => '🏈', 4054 => '🏉', 4055 => '🏊', 4056 => '🏠', 4057 => '🏡', 4058 => '🏢', 4059 => '🏣', 4060 => '🏤', 4061 => '🏥', 4062 => '🏦', 4063 => '🏧', 4064 => '🏨', 4065 => '🏩', 4066 => '🏪', 4067 => '🏫', 4068 => '🏬', 4069 => '🏭', 4070 => '🏮', 4071 => '🏯', 4072 => '🏰', 4073 => '🐀', 4074 => '🐁', 4075 => '🐂', 4076 => '🐃', 4077 => '🐄', 4078 => '🐅', 4079 => '🐆', 4080 => '🐇', 4081 => '🐈', 4082 => '🐉', 4083 => '🐊', 4084 => '🐋', 4085 => '🐌', 4086 => '🐍', 4087 => '🐎', 4088 => '🐏', 4089 => '🐐', 4090 => '🐑', 4091 => '🐒', 4092 => '🐓', 4093 => '🐔', 4094 => '🐕', 4095 => '🐖', 4096 => '🐗', 4097 => '🐘', 4098 => '🐙', 4099 => '🐚', 4100 => '🐛', 4101 => '🐜', 4102 => '🐝', 4103 => '