MediaWiki master
Parser.php
Go to the documentation of this file.
1<?php
24namespace MediaWiki\Parser;
25
26use BadMethodCallException;
30use CoreTagHooks;
31use Exception;
32use File;
33use HtmlArmor;
37use InvalidArgumentException;
38use Language;
39use LanguageCode;
41use LogicException;
42use MapCacheLRU;
43use MediaHandler;
85use Message;
88use ParserOutput;
89use PPFrame;
90use PPNode;
91use Preprocessor;
93use Psr\Log\LoggerInterface;
94use RuntimeException;
96use StringUtils;
97use StripState;
98use TextContent;
99use UnexpectedValueException;
101use Wikimedia\Bcp47Code\Bcp47CodeValue;
102use Wikimedia\IPUtils;
103use Wikimedia\Parsoid\Core\SectionMetadata;
104use Wikimedia\Parsoid\Core\TOCData;
105use Wikimedia\Parsoid\DOM\Comment;
106use Wikimedia\Parsoid\DOM\DocumentFragment;
107use Wikimedia\Parsoid\DOM\Element;
108use Wikimedia\Parsoid\DOM\Node;
109use Wikimedia\Parsoid\Utils\DOMCompat;
110use Wikimedia\Parsoid\Utils\DOMUtils;
111use Wikimedia\ScopedCallback;
112use Xml;
113
154#[\AllowDynamicProperties]
155class Parser {
156
157 # Flags for Parser::setFunctionHook
158 public const SFH_NO_HASH = 1;
159 public const SFH_OBJECT_ARGS = 2;
160
161 # Constants needed for external link processing
169 public const EXT_LINK_URL_CLASS = '[^][<>"\\x00-\\x20\\x7F\p{Zs}\x{FFFD}]';
174 // phpcs:ignore Generic.Files.LineLength
175 private const EXT_LINK_ADDR = '(?:[0-9.]+|\\[(?i:[0-9a-f:.]+)\\]|[^][<>"\\x00-\\x20\\x7F\p{Zs}\x{FFFD}])';
177 // phpcs:ignore Generic.Files.LineLength
178 private const EXT_IMAGE_REGEX = '/^(http:\/\/|https:\/\/)((?:\\[(?i:[0-9a-f:.]+)\\])?[^][<>"\\x00-\\x20\\x7F\p{Zs}\x{FFFD}]+)
179 \\/([A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF]+)\\.((?i)gif|png|jpg|jpeg)$/Sxu';
180
182 private const SPACE_NOT_NL = '(?:\t|&nbsp;|&\#0*160;|&\#[Xx]0*[Aa]0;|\p{Zs})';
183
188 public const PTD_FOR_INCLUSION = Preprocessor::DOM_FOR_INCLUSION;
189
190 # Allowed values for $this->mOutputType
192 public const OT_HTML = 1;
194 public const OT_WIKI = 2;
196 public const OT_PREPROCESS = 3;
201 public const OT_PLAIN = 4;
202
220 public const MARKER_SUFFIX = "-QINU`\"'\x7f";
221 public const MARKER_PREFIX = "\x7f'\"`UNIQ-";
222
237 public const TOC_PLACEHOLDER = '<meta property="mw:PageProp/toc" />';
238
246 private const TOC_PLACEHOLDER_REGEX = '/<meta\\b[^>]*\\bproperty\\s*=\\s*"mw:PageProp\\/toc"[^>]*>/';
247
248 # Persistent:
250 private array $mTagHooks = [];
252 private array $mFunctionHooks = [];
254 private array $mFunctionSynonyms = [ 0 => [], 1 => [] ];
256 private array $mStripList = [];
258 private array $mVarCache = [];
260 private array $mImageParams = [];
262 private array $mImageParamsMagicArray = [];
264 public $mMarkerIndex = 0;
265
266 # Initialised by initializeVariables()
267 private MagicWordArray $mVariables;
268 private MagicWordArray $mSubstWords;
269
270 # Initialised in constructor
271 private string $mExtLinkBracketedRegex;
272 private UrlUtils $urlUtils;
273 private Preprocessor $mPreprocessor;
274
275 # Cleared with clearState():
276 private ParserOutput $mOutput;
277 private int $mAutonumber = 0;
278 private StripState $mStripState;
279 private LinkHolderArray $mLinkHolders;
280 private int $mLinkID = 0;
281 private array $mIncludeSizes;
286 private array $mTplRedirCache;
288 public array $mHeadings;
290 private array $mDoubleUnderscores;
296 private bool $mShowToc;
297 private bool $mForceTocPosition;
298 private array $mTplDomCache;
299 private ?UserIdentity $mUser;
300
301 # Temporary
302 # These are variables reset at least once per parse regardless of $clearState
303
308 public $mOptions;
309
317 private int $mOutputType;
322 public $ot;
324 private ?int $mRevisionId = null;
326 private ?string $mRevisionTimestamp = null;
328 private ?string $mRevisionUser = null;
330 private ?int $mRevisionSize = null;
332 private $mInputSize = false;
333
334 private ?RevisionRecord $mRevisionRecordObject = null;
335
341 private array $mLangLinkLanguages;
342
348 private ?MapCacheLRU $currentRevisionCache = null;
349
354 private $mInParse = false;
355
356 private SectionProfiler $mProfiler;
357 private ?LinkRenderer $mLinkRenderer = null;
358
359 private MagicWordFactory $magicWordFactory;
360 private Language $contLang;
361 private LanguageConverterFactory $languageConverterFactory;
362 private ParserFactory $factory;
363 private SpecialPageFactory $specialPageFactory;
364 private TitleFormatter $titleFormatter;
370 private ServiceOptions $svcOptions;
371 private LinkRendererFactory $linkRendererFactory;
372 private NamespaceInfo $nsInfo;
373 private LoggerInterface $logger;
374 private BadFileLookup $badFileLookup;
375 private HookContainer $hookContainer;
376 private HookRunner $hookRunner;
377 private TidyDriverBase $tidy;
378 private WANObjectCache $wanCache;
379 private UserOptionsLookup $userOptionsLookup;
380 private UserFactory $userFactory;
381 private HttpRequestFactory $httpRequestFactory;
382 private TrackingCategories $trackingCategories;
383 private SignatureValidatorFactory $signatureValidatorFactory;
384 private UserNameUtils $userNameUtils;
385
389 public const CONSTRUCTOR_OPTIONS = [
390 // See documentation for the corresponding config options
391 // Many of these are only used in (eg) CoreMagicVariables
414 ];
415
442 public function __construct(
443 ServiceOptions $svcOptions,
444 MagicWordFactory $magicWordFactory,
445 Language $contLang,
446 ParserFactory $factory,
447 UrlUtils $urlUtils,
448 SpecialPageFactory $spFactory,
449 LinkRendererFactory $linkRendererFactory,
450 NamespaceInfo $nsInfo,
451 LoggerInterface $logger,
452 BadFileLookup $badFileLookup,
453 LanguageConverterFactory $languageConverterFactory,
454 HookContainer $hookContainer,
455 TidyDriverBase $tidy,
456 WANObjectCache $wanCache,
457 UserOptionsLookup $userOptionsLookup,
458 UserFactory $userFactory,
459 TitleFormatter $titleFormatter,
460 HttpRequestFactory $httpRequestFactory,
461 TrackingCategories $trackingCategories,
462 SignatureValidatorFactory $signatureValidatorFactory,
463 UserNameUtils $userNameUtils
464 ) {
466 // Direct construction of Parser was deprecated in 1.34 and
467 // removed in 1.36; use a ParserFactory instead.
468 throw new BadMethodCallException( 'Direct construction of Parser not allowed' );
469 }
470 $svcOptions->assertRequiredOptions( self::CONSTRUCTOR_OPTIONS );
471 $this->svcOptions = $svcOptions;
472
473 $this->urlUtils = $urlUtils;
474 $this->mExtLinkBracketedRegex = '/\[(((?i)' . $this->urlUtils->validProtocols() . ')' .
475 self::EXT_LINK_ADDR .
476 self::EXT_LINK_URL_CLASS . '*)\p{Zs}*([^\]\\x00-\\x08\\x0a-\\x1F\\x{FFFD}]*)\]/Su';
477
478 $this->magicWordFactory = $magicWordFactory;
479
480 $this->contLang = $contLang;
481
482 $this->factory = $factory;
483 $this->specialPageFactory = $spFactory;
484 $this->linkRendererFactory = $linkRendererFactory;
485 $this->nsInfo = $nsInfo;
486 $this->logger = $logger;
487 $this->badFileLookup = $badFileLookup;
488
489 $this->languageConverterFactory = $languageConverterFactory;
490
491 $this->hookContainer = $hookContainer;
492 $this->hookRunner = new HookRunner( $hookContainer );
493
494 $this->tidy = $tidy;
495
496 $this->wanCache = $wanCache;
497 $this->mPreprocessor = new Preprocessor_Hash(
498 $this,
499 $this->wanCache,
500 [
501 'cacheThreshold' => $svcOptions->get( MainConfigNames::PreprocessorCacheThreshold ),
502 'disableLangConversion' => $languageConverterFactory->isConversionDisabled(),
503 ]
504 );
505
506 $this->userOptionsLookup = $userOptionsLookup;
507 $this->userFactory = $userFactory;
508 $this->titleFormatter = $titleFormatter;
509 $this->httpRequestFactory = $httpRequestFactory;
510 $this->trackingCategories = $trackingCategories;
511 $this->signatureValidatorFactory = $signatureValidatorFactory;
512 $this->userNameUtils = $userNameUtils;
513
514 // These steps used to be done in "::firstCallInit()"
515 // (if you're chasing a reference from some old code)
516 CoreParserFunctions::register(
517 $this,
518 new ServiceOptions( CoreParserFunctions::REGISTER_OPTIONS, $svcOptions )
519 );
521 $this,
523 );
524 $this->initializeVariables();
525
526 $this->hookRunner->onParserFirstCallInit( $this );
527 $this->mTitle = Title::makeTitle( NS_SPECIAL, 'Badtitle/Missing' );
528 }
529
533 public function __destruct() {
534 // @phan-suppress-next-line PhanRedundantCondition Typed property not set in constructor, may be uninitialized
535 if ( isset( $this->mLinkHolders ) ) {
536 // @phan-suppress-next-line PhanTypeObjectUnsetDeclaredProperty
537 unset( $this->mLinkHolders );
538 }
539 // @phan-suppress-next-line PhanTypeSuspiciousNonTraversableForeach
540 foreach ( $this as $name => $value ) {
541 unset( $this->$name );
542 }
543 }
544
548 public function __clone() {
549 $this->mInParse = false;
550
551 // T58226: When you create a reference "to" an object field, that
552 // makes the object field itself be a reference too (until the other
553 // reference goes out of scope). When cloning, any field that's a
554 // reference is copied as a reference in the new object. Both of these
555 // are defined PHP5 behaviors, as inconvenient as it is for us when old
556 // hooks from PHP4 days are passing fields by reference.
557 foreach ( [ 'mStripState', 'mVarCache' ] as $k ) {
558 // Make a non-reference copy of the field, then rebind the field to
559 // reference the new copy.
560 $tmp = $this->$k;
561 $this->$k =& $tmp;
562 unset( $tmp );
563 }
564
565 $this->mPreprocessor = clone $this->mPreprocessor;
566 $this->mPreprocessor->resetParser( $this );
567
568 $this->hookRunner->onParserCloned( $this );
569 }
570
578 public function firstCallInit() {
579 /*
580 * This method should be hard-deprecated once remaining calls are
581 * removed; it no longer does anything.
582 */
583 }
584
590 public function clearState() {
591 $this->resetOutput();
592 $this->mAutonumber = 0;
593 $this->mLinkHolders = new LinkHolderArray(
594 $this,
595 $this->getContentLanguageConverter(),
596 $this->getHookContainer()
597 );
598 $this->mLinkID = 0;
599 $this->mRevisionTimestamp = null;
600 $this->mRevisionId = null;
601 $this->mRevisionUser = null;
602 $this->mRevisionSize = null;
603 $this->mRevisionRecordObject = null;
604 $this->mVarCache = [];
605 $this->mUser = null;
606 $this->mLangLinkLanguages = [];
607 $this->currentRevisionCache = null;
608
609 $this->mStripState = new StripState( $this );
610
611 # Clear these on every parse, T6549
612 $this->mTplRedirCache = [];
613 $this->mTplDomCache = [];
614
615 $this->mShowToc = true;
616 $this->mForceTocPosition = false;
617 $this->mIncludeSizes = [
618 'post-expand' => 0,
619 'arg' => 0,
620 ];
621 $this->mPPNodeCount = 0;
622 $this->mHighestExpansionDepth = 0;
623 $this->mHeadings = [];
624 $this->mDoubleUnderscores = [];
625 $this->mExpensiveFunctionCount = 0;
626
627 $this->mProfiler = new SectionProfiler();
628
629 $this->hookRunner->onParserClearState( $this );
630 }
631
636 public function resetOutput() {
637 $this->mOutput = new ParserOutput;
638 $this->mOptions->registerWatcher( [ $this->mOutput, 'recordOption' ] );
639 }
640
659 public function parse(
660 $text, PageReference $page, ParserOptions $options,
661 $linestart = true, $clearState = true, $revid = null
662 ) {
663 if ( $clearState ) {
664 // We use U+007F DELETE to construct strip markers, so we have to make
665 // sure that this character does not occur in the input text.
666 $text = strtr( $text, "\x7f", "?" );
667 $magicScopeVariable = $this->lock();
668 }
669 // Strip U+0000 NULL (T159174)
670 $text = str_replace( "\000", '', $text );
671
672 $this->startParse( $page, $options, self::OT_HTML, $clearState );
673
674 $this->currentRevisionCache = null;
675 $this->mInputSize = strlen( $text );
676 $this->mOutput->resetParseStartTime();
677
678 $oldRevisionId = $this->mRevisionId;
679 $oldRevisionRecordObject = $this->mRevisionRecordObject;
680 $oldRevisionTimestamp = $this->mRevisionTimestamp;
681 $oldRevisionUser = $this->mRevisionUser;
682 $oldRevisionSize = $this->mRevisionSize;
683 if ( $revid !== null ) {
684 $this->mRevisionId = $revid;
685 $this->mRevisionRecordObject = null;
686 $this->mRevisionTimestamp = null;
687 $this->mRevisionUser = null;
688 $this->mRevisionSize = null;
689 }
690
691 $text = $this->internalParse( $text );
692 $this->hookRunner->onParserAfterParse( $this, $text, $this->mStripState );
693
694 $text = $this->internalParseHalfParsed( $text, true, $linestart );
695
703 if ( !$options->getDisableTitleConversion()
704 && !isset( $this->mDoubleUnderscores['nocontentconvert'] )
705 && !isset( $this->mDoubleUnderscores['notitleconvert'] )
706 && $this->mOutput->getDisplayTitle() === false
707 ) {
708 $titleText = $this->getTargetLanguageConverter()->getConvRuleTitle();
709 if ( $titleText !== false ) {
710 $titleText = Sanitizer::removeSomeTags( $titleText );
711 } else {
712 [ $nsText, $nsSeparator, $mainText ] = $this->getTargetLanguageConverter()->convertSplitTitle( $page );
713 // In the future, those three pieces could be stored separately rather than joined into $titleText,
714 // and OutputPage would format them and join them together, to resolve T314399.
715 $titleText = self::formatPageTitle( $nsText, $nsSeparator, $mainText );
716 }
717 $this->mOutput->setTitleText( $titleText );
718 }
719
720 # Recording timing info. Must be called before finalizeAdaptiveCacheExpiry() and
721 # makeLimitReport(), which make use of the timing info.
722 $this->mOutput->recordTimeProfile();
723
724 # Compute runtime adaptive expiry if set
725 $this->mOutput->finalizeAdaptiveCacheExpiry();
726
727 # Warn if too many heavyweight parser functions were used
728 if ( $this->mExpensiveFunctionCount > $options->getExpensiveParserFunctionLimit() ) {
729 $this->limitationWarn( 'expensive-parserfunction',
730 $this->mExpensiveFunctionCount,
732 );
733 }
734
735 # Information on limits, for the benefit of users who try to skirt them
736 if ( $this->svcOptions->get( MainConfigNames::EnableParserLimitReporting ) ) {
737 $this->makeLimitReport();
738 }
739
740 $this->mOutput->setFromParserOptions( $options );
741
742 $this->mOutput->setRawText( $text );
743
744 $this->mRevisionId = $oldRevisionId;
745 $this->mRevisionRecordObject = $oldRevisionRecordObject;
746 $this->mRevisionTimestamp = $oldRevisionTimestamp;
747 $this->mRevisionUser = $oldRevisionUser;
748 $this->mRevisionSize = $oldRevisionSize;
749 $this->mInputSize = false;
750 $this->currentRevisionCache = null;
751
752 return $this->mOutput;
753 }
754
758 protected function makeLimitReport() {
759 $maxIncludeSize = $this->mOptions->getMaxIncludeSize();
760
761 $cpuTime = $this->mOutput->getTimeProfile( 'cpu' );
762 if ( $cpuTime !== null ) {
763 $this->mOutput->setLimitReportData( 'limitreport-cputime',
764 sprintf( "%.3f", $cpuTime )
765 );
766 }
767
768 $wallTime = $this->mOutput->getTimeProfile( 'wall' );
769 $this->mOutput->setLimitReportData( 'limitreport-walltime',
770 sprintf( "%.3f", $wallTime )
771 );
772
773 $this->mOutput->setLimitReportData( 'limitreport-ppvisitednodes',
774 [ $this->mPPNodeCount, $this->mOptions->getMaxPPNodeCount() ]
775 );
776 $this->mOutput->setLimitReportData( 'limitreport-postexpandincludesize',
777 [ $this->mIncludeSizes['post-expand'], $maxIncludeSize ]
778 );
779 $this->mOutput->setLimitReportData( 'limitreport-templateargumentsize',
780 [ $this->mIncludeSizes['arg'], $maxIncludeSize ]
781 );
782 $this->mOutput->setLimitReportData( 'limitreport-expansiondepth',
783 [ $this->mHighestExpansionDepth, $this->mOptions->getMaxPPExpandDepth() ]
784 );
785 $this->mOutput->setLimitReportData( 'limitreport-expensivefunctioncount',
786 [ $this->mExpensiveFunctionCount, $this->mOptions->getExpensiveParserFunctionLimit() ]
787 );
788
789 foreach ( $this->mStripState->getLimitReport() as [ $key, $value ] ) {
790 $this->mOutput->setLimitReportData( $key, $value );
791 }
792
793 $this->hookRunner->onParserLimitReportPrepare( $this, $this->mOutput );
794
795 // Add on template profiling data in human/machine readable way
796 $dataByFunc = $this->mProfiler->getFunctionStats();
797 uasort( $dataByFunc, static function ( $a, $b ) {
798 return $b['real'] <=> $a['real']; // descending order
799 } );
800 $profileReport = [];
801 foreach ( array_slice( $dataByFunc, 0, 10 ) as $item ) {
802 $profileReport[] = sprintf( "%6.2f%% %8.3f %6d %s",
803 $item['%real'], $item['real'], $item['calls'],
804 htmlspecialchars( $item['name'] ) );
805 }
806
807 $this->mOutput->setLimitReportData( 'limitreport-timingprofile', $profileReport );
808
809 // Add other cache related metadata
810 if ( $this->svcOptions->get( MainConfigNames::ShowHostnames ) ) {
811 $this->mOutput->setLimitReportData( 'cachereport-origin', wfHostname() );
812 }
813 $this->mOutput->setLimitReportData( 'cachereport-timestamp',
814 $this->mOutput->getCacheTime() );
815 $this->mOutput->setLimitReportData( 'cachereport-ttl',
816 $this->mOutput->getCacheExpiry() );
817 $this->mOutput->setLimitReportData( 'cachereport-transientcontent',
818 $this->mOutput->hasReducedExpiry() );
819 }
820
846 public function recursiveTagParse( $text, $frame = false ) {
847 $text = $this->internalParse( $text, false, $frame );
848 return $text;
849 }
850
870 public function recursiveTagParseFully( $text, $frame = false ) {
871 $text = $this->recursiveTagParse( $text, $frame );
872 $text = $this->internalParseHalfParsed( $text, false );
873 return $text;
874 }
875
895 public function parseExtensionTagAsTopLevelDoc( $text ) {
896 $text = $this->recursiveTagParse( $text );
897 $this->hookRunner->onParserAfterParse( $this, $text, $this->mStripState );
898 $text = $this->internalParseHalfParsed( $text, true );
899 return $text;
900 }
901
914 public function preprocess(
915 $text,
916 ?PageReference $page,
917 ParserOptions $options,
918 $revid = null,
919 $frame = false
920 ) {
921 $magicScopeVariable = $this->lock();
922 $this->startParse( $page, $options, self::OT_PREPROCESS, true );
923 if ( $revid !== null ) {
924 $this->mRevisionId = $revid;
925 }
926 $this->hookRunner->onParserBeforePreprocess( $this, $text, $this->mStripState );
927 $text = $this->replaceVariables( $text, $frame );
928 $text = $this->mStripState->unstripBoth( $text );
929 return $text;
930 }
931
941 public function recursivePreprocess( $text, $frame = false ) {
942 $text = $this->replaceVariables( $text, $frame );
943 $text = $this->mStripState->unstripBoth( $text );
944 return $text;
945 }
946
961 public function getPreloadText( $text, PageReference $page, ParserOptions $options, $params = [] ) {
962 $msg = new RawMessage( $text );
963 $text = $msg->params( $params )->plain();
964
965 # Parser (re)initialisation
966 $magicScopeVariable = $this->lock();
967 $this->startParse( $page, $options, self::OT_PLAIN, true );
968
969 $flags = PPFrame::NO_ARGS | PPFrame::NO_TEMPLATES;
970 $dom = $this->preprocessToDom( $text, Preprocessor::DOM_FOR_INCLUSION );
971 $text = $this->getPreprocessor()->newFrame()->expand( $dom, $flags );
972 $text = $this->mStripState->unstripBoth( $text );
973 return $text;
974 }
975
983 public function setUser( ?UserIdentity $user ) {
984 $this->mUser = $user;
985 }
986
994 public function setTitle( Title $t = null ) {
995 $this->setPage( $t );
996 }
997
1003 public function getTitle(): Title {
1004 return $this->mTitle;
1005 }
1006
1013 public function setPage( ?PageReference $t = null ) {
1014 if ( !$t ) {
1015 $t = Title::makeTitle( NS_SPECIAL, 'Badtitle/Parser' );
1016 } else {
1017 // For now (early 1.37 alpha), always convert to Title, so we don't have to do it over
1018 // and over again in other methods. Eventually, we will no longer need to have a Title
1019 // instance internally.
1020 $t = Title::newFromPageReference( $t );
1021 }
1022
1023 if ( $t->hasFragment() ) {
1024 # Strip the fragment to avoid various odd effects
1025 $this->mTitle = $t->createFragmentTarget( '' );
1026 } else {
1027 $this->mTitle = $t;
1028 }
1029 }
1030
1036 public function getPage(): ?PageReference {
1037 if ( $this->mTitle->isSpecial( 'Badtitle' ) ) {
1038 [ , $subPage ] = $this->specialPageFactory->resolveAlias( $this->mTitle->getDBkey() );
1039
1040 if ( $subPage === 'Missing' ) {
1041 wfDeprecated( __METHOD__ . ' without a Title set', '1.34' );
1042 return null;
1043 }
1044 }
1045
1046 return $this->mTitle;
1047 }
1048
1054 public function getOutputType(): int {
1055 return $this->mOutputType;
1056 }
1057
1063 public function setOutputType( $ot ): void {
1064 $this->mOutputType = $ot;
1065 # Shortcut alias
1066 $this->ot = [
1067 'html' => $ot == self::OT_HTML,
1068 'wiki' => $ot == self::OT_WIKI,
1069 'pre' => $ot == self::OT_PREPROCESS,
1070 'plain' => $ot == self::OT_PLAIN,
1071 ];
1072 }
1073
1081 public function OutputType( $x = null ) {
1082 wfDeprecated( __METHOD__, '1.35' );
1083 return wfSetVar( $this->mOutputType, $x );
1084 }
1085
1090 public function getOutput() {
1091 return $this->mOutput;
1092 }
1093
1098 public function getOptions() {
1099 return $this->mOptions;
1100 }
1101
1107 public function setOptions( ParserOptions $options ): void {
1108 $this->mOptions = $options;
1109 }
1110
1118 public function Options( $x = null ) {
1119 wfDeprecated( __METHOD__, '1.35' );
1120 return wfSetVar( $this->mOptions, $x );
1121 }
1122
1127 public function nextLinkID() {
1128 return $this->mLinkID++;
1129 }
1130
1135 public function setLinkID( $id ) {
1136 $this->mLinkID = $id;
1137 }
1138
1145 public function getFunctionLang() {
1146 wfDeprecated( __METHOD__, '1.40' );
1147 return $this->getTargetLanguage();
1148 }
1149
1158 public function getTargetLanguage() {
1159 $target = $this->mOptions->getTargetLanguage();
1160
1161 if ( $target !== null ) {
1162 return $target;
1163 } elseif ( $this->mOptions->getInterfaceMessage() ) {
1164 return $this->mOptions->getUserLangObj();
1165 }
1166
1167 return $this->getTitle()->getPageLanguage();
1168 }
1169
1177 public function getUserIdentity(): UserIdentity {
1178 return $this->mUser ?? $this->getOptions()->getUserIdentity();
1179 }
1180
1187 public function getPreprocessor() {
1188 return $this->mPreprocessor;
1189 }
1190
1197 public function getLinkRenderer() {
1198 // XXX We make the LinkRenderer with current options and then cache it forever
1199 if ( !$this->mLinkRenderer ) {
1200 $this->mLinkRenderer = $this->linkRendererFactory->create();
1201 }
1202
1203 return $this->mLinkRenderer;
1204 }
1205
1212 public function getMagicWordFactory() {
1213 return $this->magicWordFactory;
1214 }
1215
1222 public function getContentLanguage() {
1223 return $this->contLang;
1224 }
1225
1232 public function getBadFileLookup() {
1233 return $this->badFileLookup;
1234 }
1235
1255 public static function extractTagsAndParams( array $elements, $text, &$matches ) {
1256 static $n = 1;
1257 $stripped = '';
1258 $matches = [];
1259
1260 $taglist = implode( '|', $elements );
1261 $start = "/<($taglist)(\\s+[^>]*?|\\s*?)(\/?>)|<(!--)/i";
1262
1263 while ( $text != '' ) {
1264 $p = preg_split( $start, $text, 2, PREG_SPLIT_DELIM_CAPTURE );
1265 $stripped .= $p[0];
1266 if ( count( $p ) < 5 ) {
1267 break;
1268 }
1269 if ( count( $p ) > 5 ) {
1270 # comment
1271 $element = $p[4];
1272 $attributes = '';
1273 $close = '';
1274 $inside = $p[5];
1275 } else {
1276 # tag
1277 [ , $element, $attributes, $close, $inside ] = $p;
1278 }
1279
1280 $marker = self::MARKER_PREFIX . "-$element-" . sprintf( '%08X', $n++ ) . self::MARKER_SUFFIX;
1281 $stripped .= $marker;
1282
1283 if ( $close === '/>' ) {
1284 # Empty element tag, <tag />
1285 $content = null;
1286 $text = $inside;
1287 $tail = null;
1288 } else {
1289 if ( $element === '!--' ) {
1290 $end = '/(-->)/';
1291 } else {
1292 $end = "/(<\\/$element\\s*>)/i";
1293 }
1294 $q = preg_split( $end, $inside, 2, PREG_SPLIT_DELIM_CAPTURE );
1295 $content = $q[0];
1296 if ( count( $q ) < 3 ) {
1297 # No end tag -- let it run out to the end of the text.
1298 $tail = '';
1299 $text = '';
1300 } else {
1301 [ , $tail, $text ] = $q;
1302 }
1303 }
1304
1305 $matches[$marker] = [ $element,
1306 $content,
1307 Sanitizer::decodeTagAttributes( $attributes ),
1308 "<$element$attributes$close$content$tail" ];
1309 }
1310 return $stripped;
1311 }
1312
1318 public function getStripList() {
1319 return $this->mStripList;
1320 }
1321
1326 public function getStripState() {
1327 return $this->mStripState;
1328 }
1329
1339 public function insertStripItem( $text ) {
1340 $marker = self::MARKER_PREFIX . "-item-{$this->mMarkerIndex}-" . self::MARKER_SUFFIX;
1341 $this->mMarkerIndex++;
1342 $this->mStripState->addGeneral( $marker, $text );
1343 return $marker;
1344 }
1345
1352 private function handleTables( $text ) {
1353 $lines = StringUtils::explode( "\n", $text );
1354 $out = '';
1355 $td_history = []; # Is currently a td tag open?
1356 $last_tag_history = []; # Save history of last lag activated (td, th or caption)
1357 $tr_history = []; # Is currently a tr tag open?
1358 $tr_attributes = []; # history of tr attributes
1359 $has_opened_tr = []; # Did this table open a <tr> element?
1360 $indent_level = 0; # indent level of the table
1361
1362 foreach ( $lines as $outLine ) {
1363 $line = trim( $outLine );
1364
1365 if ( $line === '' ) { # empty line, go to next line
1366 $out .= $outLine . "\n";
1367 continue;
1368 }
1369
1370 $first_character = $line[0];
1371 $first_two = substr( $line, 0, 2 );
1372 $matches = [];
1373
1374 if ( preg_match( '/^(:*)\s*\{\|(.*)$/', $line, $matches ) ) {
1375 # First check if we are starting a new table
1376 $indent_level = strlen( $matches[1] );
1377
1378 $attributes = $this->mStripState->unstripBoth( $matches[2] );
1379 $attributes = Sanitizer::fixTagAttributes( $attributes, 'table' );
1380
1381 $outLine = str_repeat( '<dl><dd>', $indent_level ) . "<table{$attributes}>";
1382 $td_history[] = false;
1383 $last_tag_history[] = '';
1384 $tr_history[] = false;
1385 $tr_attributes[] = '';
1386 $has_opened_tr[] = false;
1387 } elseif ( count( $td_history ) == 0 ) {
1388 # Don't do any of the following
1389 $out .= $outLine . "\n";
1390 continue;
1391 } elseif ( $first_two === '|}' ) {
1392 # We are ending a table
1393 $line = '</table>' . substr( $line, 2 );
1394 $last_tag = array_pop( $last_tag_history );
1395
1396 if ( !array_pop( $has_opened_tr ) ) {
1397 $line = "<tr><td></td></tr>{$line}";
1398 }
1399
1400 if ( array_pop( $tr_history ) ) {
1401 $line = "</tr>{$line}";
1402 }
1403
1404 if ( array_pop( $td_history ) ) {
1405 $line = "</{$last_tag}>{$line}";
1406 }
1407 array_pop( $tr_attributes );
1408 if ( $indent_level > 0 ) {
1409 $outLine = rtrim( $line ) . str_repeat( '</dd></dl>', $indent_level );
1410 } else {
1411 $outLine = $line;
1412 }
1413 } elseif ( $first_two === '|-' ) {
1414 # Now we have a table row
1415 $line = preg_replace( '#^\|-+#', '', $line );
1416
1417 # Whats after the tag is now only attributes
1418 $attributes = $this->mStripState->unstripBoth( $line );
1419 $attributes = Sanitizer::fixTagAttributes( $attributes, 'tr' );
1420 array_pop( $tr_attributes );
1421 $tr_attributes[] = $attributes;
1422
1423 $line = '';
1424 $last_tag = array_pop( $last_tag_history );
1425 array_pop( $has_opened_tr );
1426 $has_opened_tr[] = true;
1427
1428 if ( array_pop( $tr_history ) ) {
1429 $line = '</tr>';
1430 }
1431
1432 if ( array_pop( $td_history ) ) {
1433 $line = "</{$last_tag}>{$line}";
1434 }
1435
1436 $outLine = $line;
1437 $tr_history[] = false;
1438 $td_history[] = false;
1439 $last_tag_history[] = '';
1440 } elseif ( $first_character === '|'
1441 || $first_character === '!'
1442 || $first_two === '|+'
1443 ) {
1444 # This might be cell elements, td, th or captions
1445 if ( $first_two === '|+' ) {
1446 $first_character = '+';
1447 $line = substr( $line, 2 );
1448 } else {
1449 $line = substr( $line, 1 );
1450 }
1451
1452 // Implies both are valid for table headings.
1453 if ( $first_character === '!' ) {
1454 $line = StringUtils::replaceMarkup( '!!', '||', $line );
1455 }
1456
1457 # Split up multiple cells on the same line.
1458 # FIXME : This can result in improper nesting of tags processed
1459 # by earlier parser steps.
1460 $cells = explode( '||', $line );
1461
1462 $outLine = '';
1463
1464 # Loop through each table cell
1465 foreach ( $cells as $cell ) {
1466 $previous = '';
1467 if ( $first_character !== '+' ) {
1468 $tr_after = array_pop( $tr_attributes );
1469 if ( !array_pop( $tr_history ) ) {
1470 $previous = "<tr{$tr_after}>\n";
1471 }
1472 $tr_history[] = true;
1473 $tr_attributes[] = '';
1474 array_pop( $has_opened_tr );
1475 $has_opened_tr[] = true;
1476 }
1477
1478 $last_tag = array_pop( $last_tag_history );
1479
1480 if ( array_pop( $td_history ) ) {
1481 $previous = "</{$last_tag}>\n{$previous}";
1482 }
1483
1484 if ( $first_character === '|' ) {
1485 $last_tag = 'td';
1486 } elseif ( $first_character === '!' ) {
1487 $last_tag = 'th';
1488 } elseif ( $first_character === '+' ) {
1489 $last_tag = 'caption';
1490 } else {
1491 $last_tag = '';
1492 }
1493
1494 $last_tag_history[] = $last_tag;
1495
1496 # A cell could contain both parameters and data
1497 $cell_data = explode( '|', $cell, 2 );
1498
1499 # T2553: Note that a '|' inside an invalid link should not
1500 # be mistaken as delimiting cell parameters
1501 # Bug T153140: Neither should language converter markup.
1502 if ( preg_match( '/\[\[|-\{/', $cell_data[0] ) === 1 ) {
1503 $cell = "{$previous}<{$last_tag}>" . trim( $cell );
1504 } elseif ( count( $cell_data ) == 1 ) {
1505 // Whitespace in cells is trimmed
1506 $cell = "{$previous}<{$last_tag}>" . trim( $cell_data[0] );
1507 } else {
1508 $attributes = $this->mStripState->unstripBoth( $cell_data[0] );
1509 $attributes = Sanitizer::fixTagAttributes( $attributes, $last_tag );
1510 // Whitespace in cells is trimmed
1511 $cell = "{$previous}<{$last_tag}{$attributes}>" . trim( $cell_data[1] );
1512 }
1513
1514 $outLine .= $cell;
1515 $td_history[] = true;
1516 }
1517 }
1518 $out .= $outLine . "\n";
1519 }
1520
1521 # Closing open td, tr && table
1522 while ( count( $td_history ) > 0 ) {
1523 if ( array_pop( $td_history ) ) {
1524 $out .= "</td>\n";
1525 }
1526 if ( array_pop( $tr_history ) ) {
1527 $out .= "</tr>\n";
1528 }
1529 if ( !array_pop( $has_opened_tr ) ) {
1530 $out .= "<tr><td></td></tr>\n";
1531 }
1532
1533 $out .= "</table>\n";
1534 }
1535
1536 # Remove trailing line-ending (b/c)
1537 if ( substr( $out, -1 ) === "\n" ) {
1538 $out = substr( $out, 0, -1 );
1539 }
1540
1541 # special case: don't return empty table
1542 if ( $out === "<table>\n<tr><td></td></tr>\n</table>" ) {
1543 $out = '';
1544 }
1545
1546 return $out;
1547 }
1548
1562 public function internalParse( $text, $isMain = true, $frame = false ) {
1563 $origText = $text;
1564
1565 # Hook to suspend the parser in this state
1566 if ( !$this->hookRunner->onParserBeforeInternalParse( $this, $text, $this->mStripState ) ) {
1567 return $text;
1568 }
1569
1570 # if $frame is provided, then use $frame for replacing any variables
1571 if ( $frame ) {
1572 # use frame depth to infer how include/noinclude tags should be handled
1573 # depth=0 means this is the top-level document; otherwise it's an included document
1574 if ( !$frame->depth ) {
1575 $flag = 0;
1576 } else {
1577 $flag = Preprocessor::DOM_FOR_INCLUSION;
1578 }
1579 $dom = $this->preprocessToDom( $text, $flag );
1580 $text = $frame->expand( $dom );
1581 } else {
1582 # if $frame is not provided, then use old-style replaceVariables
1583 $text = $this->replaceVariables( $text );
1584 }
1585
1586 $text = Sanitizer::internalRemoveHtmlTags(
1587 $text,
1588 // Callback from the Sanitizer for expanding items found in
1589 // HTML attribute values, so they can be safely tested and escaped.
1590 function ( &$text, $frame = false ) {
1591 $text = $this->replaceVariables( $text, $frame );
1592 $text = $this->mStripState->unstripBoth( $text );
1593 },
1594 false,
1595 [],
1596 []
1597 );
1598 $this->hookRunner->onInternalParseBeforeLinks( $this, $text, $this->mStripState );
1599
1600 # Tables need to come after variable replacement for things to work
1601 # properly; putting them before other transformations should keep
1602 # exciting things like link expansions from showing up in surprising
1603 # places.
1604 $text = $this->handleTables( $text );
1605
1606 $text = preg_replace( '/(^|\n)-----*/', '\\1<hr />', $text );
1607
1608 $text = $this->handleDoubleUnderscore( $text );
1609
1610 $text = $this->handleHeadings( $text );
1611 $text = $this->handleInternalLinks( $text );
1612 $text = $this->handleAllQuotes( $text );
1613 $text = $this->handleExternalLinks( $text );
1614
1615 # handleInternalLinks may sometimes leave behind
1616 # absolute URLs, which have to be masked to hide them from handleExternalLinks
1617 $text = str_replace( self::MARKER_PREFIX . 'NOPARSE', '', $text );
1618
1619 $text = $this->handleMagicLinks( $text );
1620 $text = $this->finalizeHeadings( $text, $origText, $isMain );
1621
1622 return $text;
1623 }
1624
1632 return $this->languageConverterFactory->getLanguageConverter(
1633 $this->getTargetLanguage()
1634 );
1635 }
1636
1642 private function getContentLanguageConverter(): ILanguageConverter {
1643 return $this->languageConverterFactory->getLanguageConverter(
1644 $this->getContentLanguage()
1645 );
1646 }
1647
1655 protected function getHookContainer() {
1656 return $this->hookContainer;
1657 }
1658
1667 protected function getHookRunner() {
1668 return $this->hookRunner;
1669 }
1670
1680 private function internalParseHalfParsed( $text, $isMain = true, $linestart = true ) {
1681 $text = $this->mStripState->unstripGeneral( $text );
1682
1683 $text = BlockLevelPass::doBlockLevels( $text, $linestart );
1684
1685 $this->replaceLinkHoldersPrivate( $text );
1686
1694 $converter = null;
1695 if ( !( $this->mOptions->getDisableContentConversion()
1696 || isset( $this->mDoubleUnderscores['nocontentconvert'] )
1697 || $this->mOptions->getInterfaceMessage() )
1698 ) {
1699 # The position of the convert() call should not be changed. it
1700 # assumes that the links are all replaced and the only thing left
1701 # is the <nowiki> mark.
1702 $converter = $this->getTargetLanguageConverter();
1703 $text = $converter->convert( $text );
1704 // TOC will be converted below.
1705 }
1706 // Convert the TOC. This is done *after* the main text
1707 // so that all the editor-defined conversion rules (by convention
1708 // defined at the start of the article) are applied to the TOC
1709 self::localizeTOC(
1710 $this->mOutput->getTOCData(),
1711 $this->getTargetLanguage(),
1712 $converter // null if conversion is to be suppressed.
1713 );
1714 if ( $converter ) {
1715 $this->mOutput->setLanguage( new Bcp47CodeValue(
1716 LanguageCode::bcp47( $converter->getPreferredVariant() )
1717 ) );
1718 } else {
1719 $this->mOutput->setLanguage( $this->getTargetLanguage() );
1720 }
1721
1722 $text = $this->mStripState->unstripNoWiki( $text );
1723
1724 $text = $this->mStripState->unstripGeneral( $text );
1725
1726 $text = $this->tidy->tidy( $text, [ Sanitizer::class, 'armorFrenchSpaces' ] );
1727
1728 if ( $isMain ) {
1729 $this->hookRunner->onParserAfterTidy( $this, $text );
1730 }
1731
1732 return $text;
1733 }
1734
1745 private function handleMagicLinks( $text ) {
1746 $prots = $this->urlUtils->validAbsoluteProtocols();
1747 $urlChar = self::EXT_LINK_URL_CLASS;
1748 $addr = self::EXT_LINK_ADDR;
1749 $space = self::SPACE_NOT_NL; # non-newline space
1750 $spdash = "(?:-|$space)"; # a dash or a non-newline space
1751 $spaces = "$space++"; # possessive match of 1 or more spaces
1752 $text = preg_replace_callback(
1753 '!(?: # Start cases
1754 (<a[ \t\r\n>].*?</a>) | # m[1]: Skip link text
1755 (<.*?>) | # m[2]: Skip stuff inside HTML elements' . "
1756 (\b # m[3]: Free external links
1757 (?i:$prots)
1758 ($addr$urlChar*) # m[4]: Post-protocol path
1759 ) |
1760 \b(?:RFC|PMID) $spaces # m[5]: RFC or PMID, capture number
1761 ([0-9]+)\b |
1762 \bISBN $spaces ( # m[6]: ISBN, capture number
1763 (?: 97[89] $spdash? )? # optional 13-digit ISBN prefix
1764 (?: [0-9] $spdash? ){9} # 9 digits with opt. delimiters
1765 [0-9Xx] # check digit
1766 )\b
1767 )!xu",
1768 [ $this, 'magicLinkCallback' ],
1769 $text
1770 );
1771 return $text;
1772 }
1773
1778 private function magicLinkCallback( array $m ) {
1779 if ( isset( $m[1] ) && $m[1] !== '' ) {
1780 # Skip anchor
1781 return $m[0];
1782 } elseif ( isset( $m[2] ) && $m[2] !== '' ) {
1783 # Skip HTML element
1784 return $m[0];
1785 } elseif ( isset( $m[3] ) && $m[3] !== '' ) {
1786 # Free external link
1787 return $this->makeFreeExternalLink( $m[0], strlen( $m[4] ) );
1788 } elseif ( isset( $m[5] ) && $m[5] !== '' ) {
1789 # RFC or PMID
1790 if ( substr( $m[0], 0, 3 ) === 'RFC' ) {
1791 if ( !$this->mOptions->getMagicRFCLinks() ) {
1792 return $m[0];
1793 }
1794 $keyword = 'RFC';
1795 $urlmsg = 'rfcurl';
1796 $cssClass = 'mw-magiclink-rfc';
1797 $trackingCat = 'magiclink-tracking-rfc';
1798 $id = $m[5];
1799 } elseif ( substr( $m[0], 0, 4 ) === 'PMID' ) {
1800 if ( !$this->mOptions->getMagicPMIDLinks() ) {
1801 return $m[0];
1802 }
1803 $keyword = 'PMID';
1804 $urlmsg = 'pubmedurl';
1805 $cssClass = 'mw-magiclink-pmid';
1806 $trackingCat = 'magiclink-tracking-pmid';
1807 $id = $m[5];
1808 } else {
1809 // Should never happen
1810 throw new UnexpectedValueException( __METHOD__ . ': unrecognised match type "' .
1811 substr( $m[0], 0, 20 ) . '"' );
1812 }
1813 $url = wfMessage( $urlmsg, $id )->inContentLanguage()->text();
1814 $this->addTrackingCategory( $trackingCat );
1815 return Linker::makeExternalLink(
1816 $url,
1817 "{$keyword} {$id}",
1818 true,
1819 $cssClass,
1820 [],
1821 $this->getTitle()
1822 );
1823 } elseif ( isset( $m[6] ) && $m[6] !== ''
1824 && $this->mOptions->getMagicISBNLinks()
1825 ) {
1826 # ISBN
1827 $isbn = $m[6];
1828 $space = self::SPACE_NOT_NL; # non-newline space
1829 $isbn = preg_replace( "/$space/", ' ', $isbn );
1830 $num = strtr( $isbn, [
1831 '-' => '',
1832 ' ' => '',
1833 'x' => 'X',
1834 ] );
1835 $this->addTrackingCategory( 'magiclink-tracking-isbn' );
1836 return $this->getLinkRenderer()->makeKnownLink(
1837 SpecialPage::getTitleFor( 'Booksources', $num ),
1838 "ISBN $isbn",
1839 [
1840 'class' => 'internal mw-magiclink-isbn',
1841 'title' => false // suppress title attribute
1842 ]
1843 );
1844 } else {
1845 return $m[0];
1846 }
1847 }
1848
1858 private function makeFreeExternalLink( $url, $numPostProto ) {
1859 $trail = '';
1860
1861 # The characters '<' and '>' (which were escaped by
1862 # internalRemoveHtmlTags()) should not be included in
1863 # URLs, per RFC 2396.
1864 # Make &nbsp; terminate a URL as well (bug T84937)
1865 $m2 = [];
1866 if ( preg_match(
1867 '/&(lt|gt|nbsp|#x0*(3[CcEe]|[Aa]0)|#0*(60|62|160));/',
1868 $url,
1869 $m2,
1870 PREG_OFFSET_CAPTURE
1871 ) ) {
1872 $trail = substr( $url, $m2[0][1] ) . $trail;
1873 $url = substr( $url, 0, $m2[0][1] );
1874 }
1875
1876 # Move trailing punctuation to $trail
1877 $sep = ',;\.:!?';
1878 # If there is no left bracket, then consider right brackets fair game too
1879 if ( strpos( $url, '(' ) === false ) {
1880 $sep .= ')';
1881 }
1882
1883 $urlRev = strrev( $url );
1884 $numSepChars = strspn( $urlRev, $sep );
1885 # Don't break a trailing HTML entity by moving the ; into $trail
1886 # This is in hot code, so use substr_compare to avoid having to
1887 # create a new string object for the comparison
1888 if ( $numSepChars && substr_compare( $url, ";", -$numSepChars, 1 ) === 0 ) {
1889 # more optimization: instead of running preg_match with a $
1890 # anchor, which can be slow, do the match on the reversed
1891 # string starting at the desired offset.
1892 # un-reversed regexp is: /&([a-z]+|#x[\da-f]+|#\d+)$/i
1893 if ( preg_match( '/\G([a-z]+|[\da-f]+x#|\d+#)&/i', $urlRev, $m2, 0, $numSepChars ) ) {
1894 $numSepChars--;
1895 }
1896 }
1897 if ( $numSepChars ) {
1898 $trail = substr( $url, -$numSepChars ) . $trail;
1899 $url = substr( $url, 0, -$numSepChars );
1900 }
1901
1902 # Verify that we still have a real URL after trail removal, and
1903 # not just lone protocol
1904 if ( strlen( $trail ) >= $numPostProto ) {
1905 return $url . $trail;
1906 }
1907
1908 $url = Sanitizer::cleanUrl( $url );
1909
1910 # Is this an external image?
1911 $text = $this->maybeMakeExternalImage( $url );
1912 if ( $text === false ) {
1913 # Not an image, make a link
1914 $text = Linker::makeExternalLink(
1915 $url,
1916 $this->getTargetLanguageConverter()->markNoConversion( $url ),
1917 true,
1918 'free',
1919 $this->getExternalLinkAttribs( $url ),
1920 $this->getTitle()
1921 );
1922 # Register it in the output object...
1923 $this->mOutput->addExternalLink( $url );
1924 }
1925 return $text . $trail;
1926 }
1927
1934 private function handleHeadings( $text ) {
1935 for ( $i = 6; $i >= 1; --$i ) {
1936 $h = str_repeat( '=', $i );
1937 // Trim non-newline whitespace from headings
1938 // Using \s* will break for: "==\n===\n" and parse as <h2>=</h2>
1939 $text = preg_replace( "/^(?:$h)[ \\t]*(.+?)[ \\t]*(?:$h)\\s*$/m", "<h$i>\\1</h$i>", $text );
1940 }
1941 return $text;
1942 }
1943
1951 private function handleAllQuotes( $text ) {
1952 $outtext = '';
1953 $lines = StringUtils::explode( "\n", $text );
1954 foreach ( $lines as $line ) {
1955 $outtext .= $this->doQuotes( $line ) . "\n";
1956 }
1957 $outtext = substr( $outtext, 0, -1 );
1958 return $outtext;
1959 }
1960
1969 public function doQuotes( $text ) {
1970 $arr = preg_split( "/(''+)/", $text, -1, PREG_SPLIT_DELIM_CAPTURE );
1971 $countarr = count( $arr );
1972 if ( $countarr == 1 ) {
1973 return $text;
1974 }
1975
1976 // First, do some preliminary work. This may shift some apostrophes from
1977 // being mark-up to being text. It also counts the number of occurrences
1978 // of bold and italics mark-ups.
1979 $numbold = 0;
1980 $numitalics = 0;
1981 for ( $i = 1; $i < $countarr; $i += 2 ) {
1982 $thislen = strlen( $arr[$i] );
1983 // If there are ever four apostrophes, assume the first is supposed to
1984 // be text, and the remaining three constitute mark-up for bold text.
1985 // (T15227: ''''foo'''' turns into ' ''' foo ' ''')
1986 if ( $thislen == 4 ) {
1987 $arr[$i - 1] .= "'";
1988 $arr[$i] = "'''";
1989 $thislen = 3;
1990 } elseif ( $thislen > 5 ) {
1991 // If there are more than 5 apostrophes in a row, assume they're all
1992 // text except for the last 5.
1993 // (T15227: ''''''foo'''''' turns into ' ''''' foo ' ''''')
1994 $arr[$i - 1] .= str_repeat( "'", $thislen - 5 );
1995 $arr[$i] = "'''''";
1996 $thislen = 5;
1997 }
1998 // Count the number of occurrences of bold and italics mark-ups.
1999 if ( $thislen == 2 ) {
2000 $numitalics++;
2001 } elseif ( $thislen == 3 ) {
2002 $numbold++;
2003 } elseif ( $thislen == 5 ) {
2004 $numitalics++;
2005 $numbold++;
2006 }
2007 }
2008
2009 // If there is an odd number of both bold and italics, it is likely
2010 // that one of the bold ones was meant to be an apostrophe followed
2011 // by italics. Which one we cannot know for certain, but it is more
2012 // likely to be one that has a single-letter word before it.
2013 if ( ( $numbold % 2 == 1 ) && ( $numitalics % 2 == 1 ) ) {
2014 $firstsingleletterword = -1;
2015 $firstmultiletterword = -1;
2016 $firstspace = -1;
2017 for ( $i = 1; $i < $countarr; $i += 2 ) {
2018 if ( strlen( $arr[$i] ) == 3 ) {
2019 $x1 = substr( $arr[$i - 1], -1 );
2020 $x2 = substr( $arr[$i - 1], -2, 1 );
2021 if ( $x1 === ' ' ) {
2022 if ( $firstspace == -1 ) {
2023 $firstspace = $i;
2024 }
2025 } elseif ( $x2 === ' ' ) {
2026 $firstsingleletterword = $i;
2027 // if $firstsingleletterword is set, we don't
2028 // look at the other options, so we can bail early.
2029 break;
2030 } elseif ( $firstmultiletterword == -1 ) {
2031 $firstmultiletterword = $i;
2032 }
2033 }
2034 }
2035
2036 // If there is a single-letter word, use it!
2037 if ( $firstsingleletterword > -1 ) {
2038 $arr[$firstsingleletterword] = "''";
2039 $arr[$firstsingleletterword - 1] .= "'";
2040 } elseif ( $firstmultiletterword > -1 ) {
2041 // If not, but there's a multi-letter word, use that one.
2042 $arr[$firstmultiletterword] = "''";
2043 $arr[$firstmultiletterword - 1] .= "'";
2044 } elseif ( $firstspace > -1 ) {
2045 // ... otherwise use the first one that has neither.
2046 // (notice that it is possible for all three to be -1 if, for example,
2047 // there is only one pentuple-apostrophe in the line)
2048 $arr[$firstspace] = "''";
2049 $arr[$firstspace - 1] .= "'";
2050 }
2051 }
2052
2053 // Now let's actually convert our apostrophic mush to HTML!
2054 $output = '';
2055 $buffer = '';
2056 $state = '';
2057 $i = 0;
2058 foreach ( $arr as $r ) {
2059 if ( ( $i % 2 ) == 0 ) {
2060 if ( $state === 'both' ) {
2061 $buffer .= $r;
2062 } else {
2063 $output .= $r;
2064 }
2065 } else {
2066 $thislen = strlen( $r );
2067 if ( $thislen == 2 ) {
2068 // two quotes - open or close italics
2069 if ( $state === 'i' ) {
2070 $output .= '</i>';
2071 $state = '';
2072 } elseif ( $state === 'bi' ) {
2073 $output .= '</i>';
2074 $state = 'b';
2075 } elseif ( $state === 'ib' ) {
2076 $output .= '</b></i><b>';
2077 $state = 'b';
2078 } elseif ( $state === 'both' ) {
2079 $output .= '<b><i>' . $buffer . '</i>';
2080 $state = 'b';
2081 } else { // $state can be 'b' or ''
2082 $output .= '<i>';
2083 $state .= 'i';
2084 }
2085 } elseif ( $thislen == 3 ) {
2086 // three quotes - open or close bold
2087 if ( $state === 'b' ) {
2088 $output .= '</b>';
2089 $state = '';
2090 } elseif ( $state === 'bi' ) {
2091 $output .= '</i></b><i>';
2092 $state = 'i';
2093 } elseif ( $state === 'ib' ) {
2094 $output .= '</b>';
2095 $state = 'i';
2096 } elseif ( $state === 'both' ) {
2097 $output .= '<i><b>' . $buffer . '</b>';
2098 $state = 'i';
2099 } else { // $state can be 'i' or ''
2100 $output .= '<b>';
2101 $state .= 'b';
2102 }
2103 } elseif ( $thislen == 5 ) {
2104 // five quotes - open or close both separately
2105 if ( $state === 'b' ) {
2106 $output .= '</b><i>';
2107 $state = 'i';
2108 } elseif ( $state === 'i' ) {
2109 $output .= '</i><b>';
2110 $state = 'b';
2111 } elseif ( $state === 'bi' ) {
2112 $output .= '</i></b>';
2113 $state = '';
2114 } elseif ( $state === 'ib' ) {
2115 $output .= '</b></i>';
2116 $state = '';
2117 } elseif ( $state === 'both' ) {
2118 $output .= '<i><b>' . $buffer . '</b></i>';
2119 $state = '';
2120 } else { // ($state == '')
2121 $buffer = '';
2122 $state = 'both';
2123 }
2124 }
2125 }
2126 $i++;
2127 }
2128 // Now close all remaining tags. Notice that the order is important.
2129 if ( $state === 'b' || $state === 'ib' ) {
2130 $output .= '</b>';
2131 }
2132 if ( $state === 'i' || $state === 'bi' || $state === 'ib' ) {
2133 $output .= '</i>';
2134 }
2135 if ( $state === 'bi' ) {
2136 $output .= '</b>';
2137 }
2138 // There might be lonely ''''', so make sure we have a buffer
2139 if ( $state === 'both' && $buffer ) {
2140 $output .= '<b><i>' . $buffer . '</i></b>';
2141 }
2142 return $output;
2143 }
2144
2154 private function handleExternalLinks( $text ) {
2155 $bits = preg_split( $this->mExtLinkBracketedRegex, $text, -1, PREG_SPLIT_DELIM_CAPTURE );
2156 // @phan-suppress-next-line PhanTypeComparisonFromArray See phan issue #3161
2157 if ( $bits === false ) {
2158 throw new RuntimeException( "PCRE failure" );
2159 }
2160 $s = array_shift( $bits );
2161
2162 $i = 0;
2163 while ( $i < count( $bits ) ) {
2164 $url = $bits[$i++];
2165 $i++; // protocol
2166 $text = $bits[$i++];
2167 $trail = $bits[$i++];
2168
2169 # The characters '<' and '>' (which were escaped by
2170 # internalRemoveHtmlTags()) should not be included in
2171 # URLs, per RFC 2396.
2172 $m2 = [];
2173 if ( preg_match( '/&(lt|gt);/', $url, $m2, PREG_OFFSET_CAPTURE ) ) {
2174 $text = substr( $url, $m2[0][1] ) . ' ' . $text;
2175 $url = substr( $url, 0, $m2[0][1] );
2176 }
2177
2178 # If the link text is an image URL, replace it with an <img> tag
2179 # This happened by accident in the original parser, but some people used it extensively
2180 $img = $this->maybeMakeExternalImage( $text );
2181 if ( $img !== false ) {
2182 $text = $img;
2183 }
2184
2185 $dtrail = '';
2186
2187 # Set linktype for CSS
2188 $linktype = 'text';
2189
2190 # No link text, e.g. [http://domain.tld/some.link]
2191 if ( $text == '' ) {
2192 # Autonumber
2193 $langObj = $this->getTargetLanguage();
2194 $text = '[' . $langObj->formatNum( ++$this->mAutonumber ) . ']';
2195 $linktype = 'autonumber';
2196 } else {
2197 # Have link text, e.g. [http://domain.tld/some.link text]s
2198 # Check for trail
2199 [ $dtrail, $trail ] = Linker::splitTrail( $trail );
2200 }
2201
2202 // Excluding protocol-relative URLs may avoid many false positives.
2203 if ( preg_match( '/^(?:' . $this->urlUtils->validAbsoluteProtocols() . ')/', $text ) ) {
2204 $text = $this->getTargetLanguageConverter()->markNoConversion( $text );
2205 }
2206
2207 $url = Sanitizer::cleanUrl( $url );
2208
2209 # Use the encoded URL
2210 # This means that users can paste URLs directly into the text
2211 # Funny characters like ö aren't valid in URLs anyway
2212 # This was changed in August 2004
2213 $s .= Linker::makeExternalLink( $url, $text, false, $linktype,
2214 $this->getExternalLinkAttribs( $url ), $this->getTitle() ) . $dtrail . $trail;
2215
2216 # Register link in the output object.
2217 $this->mOutput->addExternalLink( $url );
2218 }
2219
2220 // @phan-suppress-next-line PhanTypeMismatchReturnNullable False positive from array_shift
2221 return $s;
2222 }
2223
2234 public static function getExternalLinkRel( $url = false, LinkTarget $title = null ) {
2235 $mainConfig = MediaWikiServices::getInstance()->getMainConfig();
2236 $noFollowLinks = $mainConfig->get( MainConfigNames::NoFollowLinks );
2237 $noFollowNsExceptions = $mainConfig->get( MainConfigNames::NoFollowNsExceptions );
2238 $noFollowDomainExceptions = $mainConfig->get( MainConfigNames::NoFollowDomainExceptions );
2239 $ns = $title ? $title->getNamespace() : false;
2240 if ( $noFollowLinks && !in_array( $ns, $noFollowNsExceptions )
2241 && !wfMatchesDomainList( $url, $noFollowDomainExceptions )
2242 ) {
2243 return 'nofollow';
2244 }
2245 return null;
2246 }
2247
2259 public function getExternalLinkAttribs( $url ) {
2260 $attribs = [];
2261 $rel = self::getExternalLinkRel( $url, $this->getTitle() ) ?? '';
2262
2263 $target = $this->mOptions->getExternalLinkTarget();
2264 if ( $target ) {
2265 $attribs['target'] = $target;
2266 if ( !in_array( $target, [ '_self', '_parent', '_top' ] ) ) {
2267 // T133507. New windows can navigate parent cross-origin.
2268 // Including noreferrer due to lacking browser
2269 // support of noopener. Eventually noreferrer should be removed.
2270 if ( $rel !== '' ) {
2271 $rel .= ' ';
2272 }
2273 $rel .= 'noreferrer noopener';
2274 }
2275 }
2276 if ( $rel !== '' ) {
2277 $attribs['rel'] = $rel;
2278 }
2279 return $attribs;
2280 }
2281
2292 public static function normalizeLinkUrl( $url ) {
2293 # Test for RFC 3986 IPv6 syntax
2294 $scheme = '[a-z][a-z0-9+.-]*:';
2295 $userinfo = '(?:[a-z0-9\-._~!$&\'()*+,;=:]|%[0-9a-f]{2})*';
2296 $ipv6Host = '\\[((?:[0-9a-f:]|%3[0-A]|%[46][1-6])+)\\]';
2297 if ( preg_match( "<^(?:{$scheme})?//(?:{$userinfo}@)?{$ipv6Host}(?:[:/?#].*|)$>i", $url, $m ) &&
2298 IPUtils::isValid( rawurldecode( $m[1] ) )
2299 ) {
2300 $isIPv6 = rawurldecode( $m[1] );
2301 } else {
2302 $isIPv6 = false;
2303 }
2304
2305 # Make sure unsafe characters are encoded
2306 $url = preg_replace_callback(
2307 '/[\x00-\x20"<>\[\\\\\]^`{|}\x7F-\xFF]+/',
2308 static fn ( $m ) => rawurlencode( $m[0] ),
2309 $url
2310 );
2311
2312 $ret = '';
2313 $end = strlen( $url );
2314
2315 # Fragment part - 'fragment'
2316 $start = strpos( $url, '#' );
2317 if ( $start !== false && $start < $end ) {
2318 $ret = self::normalizeUrlComponent(
2319 substr( $url, $start, $end - $start ), '"#%<>[\]^`{|}' ) . $ret;
2320 $end = $start;
2321 }
2322
2323 # Query part - 'query' minus &=+;
2324 $start = strpos( $url, '?' );
2325 if ( $start !== false && $start < $end ) {
2326 $ret = self::normalizeUrlComponent(
2327 substr( $url, $start, $end - $start ), '"#%<>[\]^`{|}&=+;' ) . $ret;
2328 $end = $start;
2329 }
2330
2331 # Path part - 'pchar', remove dot segments
2332 # (find first '/' after the optional '//' after the scheme)
2333 $start = strpos( $url, '//' );
2334 $start = strpos( $url, '/', $start === false ? 0 : $start + 2 );
2335 if ( $start !== false && $start < $end ) {
2336 $ret = UrlUtils::removeDotSegments( self::normalizeUrlComponent(
2337 substr( $url, $start, $end - $start ), '"#%<>[\]^`{|}/?' ) ) . $ret;
2338 $end = $start;
2339 }
2340
2341 # Scheme and host part - 'pchar'
2342 # (we assume no userinfo or encoded colons in the host)
2343 $ret = self::normalizeUrlComponent(
2344 substr( $url, 0, $end ), '"#%<>[\]^`{|}/?' ) . $ret;
2345
2346 # Fix IPv6 syntax
2347 if ( $isIPv6 !== false ) {
2348 $ipv6Host = "%5B({$isIPv6})%5D";
2349 $ret = preg_replace(
2350 "<^((?:{$scheme})?//(?:{$userinfo}@)?){$ipv6Host}(?=[:/?#]|$)>i",
2351 "$1[$2]",
2352 $ret
2353 );
2354 }
2355
2356 return $ret;
2357 }
2358
2359 private static function normalizeUrlComponent( $component, $unsafe ) {
2360 $callback = static function ( $matches ) use ( $unsafe ) {
2361 $char = urldecode( $matches[0] );
2362 $ord = ord( $char );
2363 if ( $ord > 32 && $ord < 127 && strpos( $unsafe, $char ) === false ) {
2364 # Unescape it
2365 return $char;
2366 } else {
2367 # Leave it escaped, but use uppercase for a-f
2368 return strtoupper( $matches[0] );
2369 }
2370 };
2371 return preg_replace_callback( '/%[0-9A-Fa-f]{2}/', $callback, $component );
2372 }
2373
2382 private function maybeMakeExternalImage( $url ) {
2383 $imagesfrom = $this->mOptions->getAllowExternalImagesFrom();
2384 $imagesexception = (bool)$imagesfrom;
2385 $text = false;
2386 # $imagesfrom could be either a single string or an array of strings, parse out the latter
2387 if ( $imagesexception && is_array( $imagesfrom ) ) {
2388 $imagematch = false;
2389 foreach ( $imagesfrom as $match ) {
2390 if ( strpos( $url, $match ) === 0 ) {
2391 $imagematch = true;
2392 break;
2393 }
2394 }
2395 } elseif ( $imagesexception ) {
2396 $imagematch = ( strpos( $url, $imagesfrom ) === 0 );
2397 } else {
2398 $imagematch = false;
2399 }
2400
2401 if ( $this->mOptions->getAllowExternalImages()
2402 || ( $imagesexception && $imagematch )
2403 ) {
2404 if ( preg_match( self::EXT_IMAGE_REGEX, $url ) ) {
2405 # Image found
2406 $text = Linker::makeExternalImage( $url );
2407 }
2408 }
2409 if ( !$text && $this->mOptions->getEnableImageWhitelist()
2410 && preg_match( self::EXT_IMAGE_REGEX, $url )
2411 ) {
2412 $whitelist = explode(
2413 "\n",
2414 wfMessage( 'external_image_whitelist' )->inContentLanguage()->text()
2415 );
2416
2417 foreach ( $whitelist as $entry ) {
2418 # Sanitize the regex fragment, make it case-insensitive, ignore blank entries/comments
2419 if ( strpos( $entry, '#' ) === 0 || $entry === '' ) {
2420 continue;
2421 }
2422 // @phan-suppress-next-line SecurityCheck-ReDoS preg_quote is not wanted here
2423 if ( preg_match( '/' . str_replace( '/', '\\/', $entry ) . '/i', $url ) ) {
2424 # Image matches a whitelist entry
2425 $text = Linker::makeExternalImage( $url );
2426 break;
2427 }
2428 }
2429 }
2430 return $text;
2431 }
2432
2440 private function handleInternalLinks( $text ) {
2441 $this->mLinkHolders->merge( $this->handleInternalLinks2( $text ) );
2442 return $text;
2443 }
2444
2450 private function handleInternalLinks2( &$s ) {
2451 static $tc = false, $e1, $e1_img;
2452 # the % is needed to support urlencoded titles as well
2453 if ( !$tc ) {
2454 $tc = Title::legalChars() . '#%';
2455 # Match a link having the form [[namespace:link|alternate]]trail
2456 $e1 = "/^([{$tc}]+)(?:\\|(.+?))?]](.*)\$/sD";
2457 # Match cases where there is no "]]", which might still be images
2458 $e1_img = "/^([{$tc}]+)\\|(.*)\$/sD";
2459 }
2460
2461 $holders = new LinkHolderArray(
2462 $this,
2463 $this->getContentLanguageConverter(),
2464 $this->getHookContainer() );
2465
2466 # split the entire text string on occurrences of [[
2467 $a = StringUtils::explode( '[[', ' ' . $s );
2468 # get the first element (all text up to first [[), and remove the space we added
2469 $s = $a->current();
2470 $a->next();
2471 $line = $a->current(); # Workaround for broken ArrayIterator::next() that returns "void"
2472 $s = substr( $s, 1 );
2473
2474 $nottalk = !$this->getTitle()->isTalkPage();
2475
2476 $useLinkPrefixExtension = $this->getTargetLanguage()->linkPrefixExtension();
2477 $e2 = null;
2478 if ( $useLinkPrefixExtension ) {
2479 # Match the end of a line for a word that's not followed by whitespace,
2480 # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
2481 $charset = $this->contLang->linkPrefixCharset();
2482 $e2 = "/^((?>.*[^$charset]|))(.+)$/sDu";
2483 $m = [];
2484 if ( preg_match( $e2, $s, $m ) ) {
2485 $first_prefix = $m[2];
2486 } else {
2487 $first_prefix = false;
2488 }
2489 $prefix = false;
2490 } else {
2491 $first_prefix = false;
2492 $prefix = '';
2493 }
2494
2495 # Some namespaces don't allow subpages
2496 $useSubpages = $this->nsInfo->hasSubpages(
2497 $this->getTitle()->getNamespace()
2498 );
2499
2500 # Loop for each link
2501 for ( ; $line !== false && $line !== null; $a->next(), $line = $a->current() ) {
2502 # Check for excessive memory usage
2503 if ( $holders->isBig() ) {
2504 # Too big
2505 # Do the existence check, replace the link holders and clear the array
2506 $holders->replace( $s );
2507 $holders->clear();
2508 }
2509
2510 if ( $useLinkPrefixExtension ) {
2511 // @phan-suppress-next-line PhanTypeMismatchArgumentNullableInternal $e2 is set under this condition
2512 if ( preg_match( $e2, $s, $m ) ) {
2513 [ , $s, $prefix ] = $m;
2514 } else {
2515 $prefix = '';
2516 }
2517 # first link
2518 if ( $first_prefix ) {
2519 $prefix = $first_prefix;
2520 $first_prefix = false;
2521 }
2522 }
2523
2524 $might_be_img = false;
2525
2526 if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
2527 $text = $m[2];
2528 # If we get a ] at the beginning of $m[3] that means we have a link that's something like:
2529 # [[Image:Foo.jpg|[http://example.com desc]]] <- having three ] in a row fucks up,
2530 # the real problem is with the $e1 regex
2531 # See T1500.
2532 # Still some problems for cases where the ] is meant to be outside punctuation,
2533 # and no image is in sight. See T4095.
2534 if ( $text !== ''
2535 && substr( $m[3], 0, 1 ) === ']'
2536 && strpos( $text, '[' ) !== false
2537 ) {
2538 $text .= ']'; # so that handleExternalLinks($text) works later
2539 $m[3] = substr( $m[3], 1 );
2540 }
2541 # fix up urlencoded title texts
2542 if ( strpos( $m[1], '%' ) !== false ) {
2543 # Should anchors '#' also be rejected?
2544 $m[1] = str_replace( [ '<', '>' ], [ '&lt;', '&gt;' ], rawurldecode( $m[1] ) );
2545 }
2546 $trail = $m[3];
2547 } elseif ( preg_match( $e1_img, $line, $m ) ) {
2548 # Invalid, but might be an image with a link in its caption
2549 $might_be_img = true;
2550 $text = $m[2];
2551 if ( strpos( $m[1], '%' ) !== false ) {
2552 $m[1] = str_replace( [ '<', '>' ], [ '&lt;', '&gt;' ], rawurldecode( $m[1] ) );
2553 }
2554 $trail = "";
2555 } else { # Invalid form; output directly
2556 $s .= $prefix . '[[' . $line;
2557 continue;
2558 }
2559
2560 // @phan-suppress-next-line PhanTypePossiblyInvalidDimOffset preg_match success when reached here
2561 $origLink = ltrim( $m[1], ' ' );
2562
2563 # Don't allow internal links to pages containing
2564 # PROTO: where PROTO is a valid URL protocol; these
2565 # should be external links.
2566 if ( preg_match( '/^(?i:' . $this->urlUtils->validProtocols() . ')/', $origLink ) ) {
2567 $s .= $prefix . '[[' . $line;
2568 continue;
2569 }
2570
2571 # Make subpage if necessary
2572 if ( $useSubpages ) {
2573 $link = Linker::normalizeSubpageLink(
2574 $this->getTitle(), $origLink, $text
2575 );
2576 } else {
2577 $link = $origLink;
2578 }
2579
2580 // \x7f isn't a default legal title char, so most likely strip
2581 // markers will force us into the "invalid form" path above. But,
2582 // just in case, let's assert that xmlish tags aren't valid in
2583 // the title position.
2584 $unstrip = $this->mStripState->killMarkers( $link );
2585 $noMarkers = ( $unstrip === $link );
2586
2587 $nt = $noMarkers ? Title::newFromText( $link ) : null;
2588 if ( $nt === null ) {
2589 $s .= $prefix . '[[' . $line;
2590 continue;
2591 }
2592
2593 $ns = $nt->getNamespace();
2594 $iw = $nt->getInterwiki();
2595
2596 $noforce = ( substr( $origLink, 0, 1 ) !== ':' );
2597
2598 if ( $might_be_img ) { # if this is actually an invalid link
2599 if ( $ns === NS_FILE && $noforce ) { # but might be an image
2600 $found = false;
2601 while ( true ) {
2602 # look at the next 'line' to see if we can close it there
2603 $a->next();
2604 $next_line = $a->current();
2605 if ( $next_line === false || $next_line === null ) {
2606 break;
2607 }
2608 $m = explode( ']]', $next_line, 3 );
2609 if ( count( $m ) == 3 ) {
2610 # the first ]] closes the inner link, the second the image
2611 $found = true;
2612 $text .= "[[{$m[0]}]]{$m[1]}";
2613 $trail = $m[2];
2614 break;
2615 } elseif ( count( $m ) == 2 ) {
2616 # if there's exactly one ]] that's fine, we'll keep looking
2617 $text .= "[[{$m[0]}]]{$m[1]}";
2618 } else {
2619 # if $next_line is invalid too, we need look no further
2620 $text .= '[[' . $next_line;
2621 break;
2622 }
2623 }
2624 if ( !$found ) {
2625 # we couldn't find the end of this imageLink, so output it raw
2626 # but don't ignore what might be perfectly normal links in the text we've examined
2627 $holders->merge( $this->handleInternalLinks2( $text ) );
2628 $s .= "{$prefix}[[$link|$text";
2629 # note: no $trail, because without an end, there *is* no trail
2630 continue;
2631 }
2632 } else { # it's not an image, so output it raw
2633 $s .= "{$prefix}[[$link|$text";
2634 # note: no $trail, because without an end, there *is* no trail
2635 continue;
2636 }
2637 }
2638
2639 $wasblank = ( $text == '' );
2640 if ( $wasblank ) {
2641 $text = $link;
2642 if ( !$noforce ) {
2643 # Strip off leading ':'
2644 $text = substr( $text, 1 );
2645 }
2646 } else {
2647 # T6598 madness. Handle the quotes only if they come from the alternate part
2648 # [[Lista d''e paise d''o munno]] -> <a href="...">Lista d''e paise d''o munno</a>
2649 # [[Criticism of Harry Potter|Criticism of ''Harry Potter'']]
2650 # -> <a href="Criticism of Harry Potter">Criticism of <i>Harry Potter</i></a>
2651 $text = $this->doQuotes( $text );
2652 }
2653
2654 # Link not escaped by : , create the various objects
2655 if ( $noforce && !$nt->wasLocalInterwiki() ) {
2656 # Interwikis
2657 if (
2658 $iw && $this->mOptions->getInterwikiMagic() && $nottalk && (
2659 MediaWikiServices::getInstance()->getLanguageNameUtils()
2660 ->getLanguageName(
2661 $iw,
2662 LanguageNameUtils::AUTONYMS,
2663 LanguageNameUtils::DEFINED
2664 )
2665 || in_array( $iw, $this->svcOptions->get( MainConfigNames::ExtraInterlanguageLinkPrefixes ) )
2666 )
2667 ) {
2668 # T26502: filter duplicates
2669 if ( !isset( $this->mLangLinkLanguages[$iw] ) ) {
2670 $this->mLangLinkLanguages[$iw] = true;
2671 $this->mOutput->addLanguageLink( $nt );
2672 }
2673
2677 $s = rtrim( $s . $prefix ) . $trail; # T175416
2678 continue;
2679 }
2680
2681 if ( $ns === NS_FILE ) {
2682 if ( $wasblank ) {
2683 # if no parameters were passed, $text
2684 # becomes something like "File:Foo.png",
2685 # which we don't want to pass on to the
2686 # image generator
2687 $text = '';
2688 } else {
2689 # recursively parse links inside the image caption
2690 # actually, this will parse them in any other parameters, too,
2691 # but it might be hard to fix that, and it doesn't matter ATM
2692 $text = $this->handleExternalLinks( $text );
2693 $holders->merge( $this->handleInternalLinks2( $text ) );
2694 }
2695 # cloak any absolute URLs inside the image markup, so handleExternalLinks() won't touch them
2696 $s .= $prefix . $this->armorLinks(
2697 $this->makeImage( $nt, $text, $holders ) ) . $trail;
2698 continue;
2699 } elseif ( $ns === NS_CATEGORY ) {
2703 $s = rtrim( $s . $prefix ) . $trail; # T2087, T87753
2704
2705 if ( $wasblank ) {
2706 $sortkey = $this->mOutput->getPageProperty( 'defaultsort' ) ?? '';
2707 } else {
2708 $sortkey = $text;
2709 }
2710 $sortkey = Sanitizer::decodeCharReferences( $sortkey );
2711 $sortkey = str_replace( "\n", '', $sortkey );
2712 $sortkey = $this->getTargetLanguageConverter()->convertCategoryKey( $sortkey );
2713 $this->mOutput->addCategory( $nt, $sortkey );
2714
2715 continue;
2716 }
2717 }
2718
2719 # Self-link checking. For some languages, variants of the title are checked in
2720 # LinkHolderArray::doVariants() to allow batching the existence checks necessary
2721 # for linking to a different variant.
2722 if ( $ns !== NS_SPECIAL && $nt->equals( $this->getTitle() ) ) {
2723 $s .= $prefix . Linker::makeSelfLinkObj( $nt, $text, '', $trail, '',
2724 Sanitizer::escapeIdForLink( $nt->getFragment() ) );
2725 continue;
2726 }
2727
2728 # NS_MEDIA is a pseudo-namespace for linking directly to a file
2729 # @todo FIXME: Should do batch file existence checks, see comment below
2730 if ( $ns === NS_MEDIA ) {
2731 # Give extensions a chance to select the file revision for us
2732 $options = [];
2733 $descQuery = false;
2734 $this->hookRunner->onBeforeParserFetchFileAndTitle(
2735 // @phan-suppress-next-line PhanTypeMismatchArgument Type mismatch on pass-by-ref args
2736 $this, $nt, $options, $descQuery
2737 );
2738 # Fetch and register the file (file title may be different via hooks)
2739 [ $file, $nt ] = $this->fetchFileAndTitle( $nt, $options );
2740 # Cloak with NOPARSE to avoid replacement in handleExternalLinks
2741 $s .= $prefix . $this->armorLinks(
2742 Linker::makeMediaLinkFile( $nt, $file, $text ) ) . $trail;
2743 continue;
2744 }
2745
2746 # Some titles, such as valid special pages or files in foreign repos, should
2747 # be shown as bluelinks even though they're not included in the page table
2748 # @todo FIXME: isAlwaysKnown() can be expensive for file links; we should really do
2749 # batch file existence checks for NS_FILE and NS_MEDIA
2750 if ( $iw == '' && $nt->isAlwaysKnown() ) {
2751 $this->mOutput->addLink( $nt );
2752 $s .= $this->makeKnownLinkHolder( $nt, $text, $trail, $prefix );
2753 } else {
2754 # Links will be added to the output link list after checking
2755 $s .= $holders->makeHolder( $nt, $text, $trail, $prefix );
2756 }
2757 }
2758 return $holders;
2759 }
2760
2774 private function makeKnownLinkHolder( LinkTarget $nt, $text = '', $trail = '', $prefix = '' ) {
2775 [ $inside, $trail ] = Linker::splitTrail( $trail );
2776
2777 if ( $text == '' ) {
2778 $text = htmlspecialchars( $this->titleFormatter->getPrefixedText( $nt ) );
2779 }
2780
2781 $link = $this->getLinkRenderer()->makeKnownLink(
2782 $nt, new HtmlArmor( "$prefix$text$inside" )
2783 );
2784
2785 return $this->armorLinks( $link ) . $trail;
2786 }
2787
2798 private function armorLinks( $text ) {
2799 return preg_replace( '/\b((?i)' . $this->urlUtils->validProtocols() . ')/',
2800 self::MARKER_PREFIX . "NOPARSE$1", $text );
2801 }
2802
2812 public function doBlockLevels( $text, $linestart ) {
2813 wfDeprecated( __METHOD__, '1.35' );
2814 return BlockLevelPass::doBlockLevels( $text, $linestart );
2815 }
2816
2825 private function expandMagicVariable( $index, $frame = false ) {
2830 if ( isset( $this->mVarCache[$index] ) ) {
2831 return $this->mVarCache[$index];
2832 }
2833
2834 $ts = new MWTimestamp( $this->mOptions->getTimestamp() /* TS_MW */ );
2835 if ( $this->hookContainer->isRegistered( 'ParserGetVariableValueTs' ) ) {
2836 $s = $ts->getTimestamp( TS_UNIX );
2837 $this->hookRunner->onParserGetVariableValueTs( $this, $s );
2838 $ts = new MWTimestamp( $s );
2839 }
2840
2841 $value = CoreMagicVariables::expand(
2842 $this, $index, $ts, $this->svcOptions, $this->logger
2843 );
2844
2845 if ( $value === null ) {
2846 // Not a defined core magic word
2847 // Don't give this hook unrestricted access to mVarCache
2848 $fakeCache = [];
2849 $this->hookRunner->onParserGetVariableValueSwitch(
2850 // @phan-suppress-next-line PhanTypeMismatchArgument $value is passed as null but returned as string
2851 $this, $fakeCache, $index, $value, $frame
2852 );
2853 // Cache the value returned by the hook by falling through here.
2854 // Assert the the hook returned a non-null value for this MV
2855 '@phan-var string $value';
2856 }
2857
2858 $this->mVarCache[$index] = $value;
2859
2860 return $value;
2861 }
2862
2867 private function initializeVariables() {
2868 $variableIDs = $this->magicWordFactory->getVariableIDs();
2869
2870 $this->mVariables = $this->magicWordFactory->newArray( $variableIDs );
2871 $this->mSubstWords = $this->magicWordFactory->getSubstArray();
2872 }
2873
2892 public function preprocessToDom( $text, $flags = 0 ) {
2893 return $this->getPreprocessor()->preprocessToObj( $text, $flags );
2894 }
2895
2917 public function replaceVariables( $text, $frame = false, $argsOnly = false ) {
2918 # Is there any text? Also, Prevent too big inclusions!
2919 $textSize = strlen( $text );
2920 if ( $textSize < 1 || $textSize > $this->mOptions->getMaxIncludeSize() ) {
2921 return $text;
2922 }
2923
2924 if ( $frame === false ) {
2925 $frame = $this->getPreprocessor()->newFrame();
2926 } elseif ( !( $frame instanceof PPFrame ) ) {
2927 $this->logger->debug(
2928 __METHOD__ . " called using plain parameters instead of " .
2929 "a PPFrame instance. Creating custom frame."
2930 );
2931 $frame = $this->getPreprocessor()->newCustomFrame( $frame );
2932 }
2933
2934 $dom = $this->preprocessToDom( $text );
2935 $flags = $argsOnly ? PPFrame::NO_TEMPLATES : 0;
2936 $text = $frame->expand( $dom, $flags );
2937
2938 return $text;
2939 }
2940
2968 public function limitationWarn( $limitationType, $current = '', $max = '' ) {
2969 # does no harm if $current and $max are present but are unnecessary for the message
2970 # Not doing ->inLanguage( $this->mOptions->getUserLangObj() ), since this is shown
2971 # only during preview, and that would split the parser cache unnecessarily.
2972 $this->mOutput->addWarningMsg(
2973 "$limitationType-warning",
2974 Message::numParam( $current ),
2975 Message::numParam( $max )
2976 );
2977 $this->addTrackingCategory( "$limitationType-category" );
2978 }
2979
2993 public function braceSubstitution( array $piece, PPFrame $frame ) {
2994 // Flags
2995
2996 // $text has been filled
2997 $found = false;
2998 $text = '';
2999 // wiki markup in $text should be escaped
3000 $nowiki = false;
3001 // $text is HTML, armour it against wikitext transformation
3002 $isHTML = false;
3003 // Force interwiki transclusion to be done in raw mode not rendered
3004 $forceRawInterwiki = false;
3005 // $text is a DOM node needing expansion in a child frame
3006 $isChildObj = false;
3007 // $text is a DOM node needing expansion in the current frame
3008 $isLocalObj = false;
3009
3010 # Title object, where $text came from
3011 $title = false;
3012
3013 # $part1 is the bit before the first |, and must contain only title characters.
3014 # Various prefixes will be stripped from it later.
3015 $titleWithSpaces = $frame->expand( $piece['title'] );
3016 $part1 = trim( $titleWithSpaces );
3017 $titleText = false;
3018
3019 # Original title text preserved for various purposes
3020 $originalTitle = $part1;
3021
3022 # $args is a list of argument nodes, starting from index 0, not including $part1
3023 $args = $piece['parts'];
3024
3025 $profileSection = null; // profile templates
3026
3027 $sawDeprecatedTemplateEquals = false; // T91154
3028
3029 # SUBST
3030 // @phan-suppress-next-line PhanImpossibleCondition
3031 if ( !$found ) {
3032 $substMatch = $this->mSubstWords->matchStartAndRemove( $part1 );
3033 $part1 = trim( $part1 );
3034
3035 # Possibilities for substMatch: "subst", "safesubst" or FALSE
3036 # Decide whether to expand template or keep wikitext as-is.
3037 if ( $this->ot['wiki'] ) {
3038 if ( $substMatch === false ) {
3039 $literal = true; # literal when in PST with no prefix
3040 } else {
3041 $literal = false; # expand when in PST with subst: or safesubst:
3042 }
3043 } else {
3044 if ( $substMatch == 'subst' ) {
3045 $literal = true; # literal when not in PST with plain subst:
3046 } else {
3047 $literal = false; # expand when not in PST with safesubst: or no prefix
3048 }
3049 }
3050 if ( $literal ) {
3051 $text = $frame->virtualBracketedImplode( '{{', '|', '}}', $titleWithSpaces, $args );
3052 $isLocalObj = true;
3053 $found = true;
3054 }
3055 }
3056
3057 # Variables
3058 if ( !$found && $args->getLength() == 0 ) {
3059 $id = $this->mVariables->matchStartToEnd( $part1 );
3060 if ( $id !== false ) {
3061 if ( strpos( $part1, ':' ) !== false ) {
3063 'Registering a magic variable with a name including a colon',
3064 '1.39', false, false
3065 );
3066 }
3067 $text = $this->expandMagicVariable( $id, $frame );
3068 $found = true;
3069 }
3070 }
3071
3072 # MSG, MSGNW and RAW
3073 if ( !$found ) {
3074 # Check for MSGNW:
3075 $mwMsgnw = $this->magicWordFactory->get( 'msgnw' );
3076 if ( $mwMsgnw->matchStartAndRemove( $part1 ) ) {
3077 $nowiki = true;
3078 } else {
3079 # Remove obsolete MSG:
3080 $mwMsg = $this->magicWordFactory->get( 'msg' );
3081 $mwMsg->matchStartAndRemove( $part1 );
3082 }
3083
3084 # Check for RAW:
3085 $mwRaw = $this->magicWordFactory->get( 'raw' );
3086 if ( $mwRaw->matchStartAndRemove( $part1 ) ) {
3087 $forceRawInterwiki = true;
3088 }
3089 }
3090
3091 # Parser functions
3092 if ( !$found ) {
3093 $colonPos = strpos( $part1, ':' );
3094 if ( $colonPos !== false ) {
3095 $func = substr( $part1, 0, $colonPos );
3096 $funcArgs = [ trim( substr( $part1, $colonPos + 1 ) ) ];
3097 $argsLength = $args->getLength();
3098 for ( $i = 0; $i < $argsLength; $i++ ) {
3099 $funcArgs[] = $args->item( $i );
3100 }
3101
3102 $result = $this->callParserFunction( $frame, $func, $funcArgs );
3103
3104 // Extract any forwarded flags
3105 if ( isset( $result['title'] ) ) {
3106 $title = $result['title'];
3107 }
3108 if ( isset( $result['found'] ) ) {
3109 $found = $result['found'];
3110 }
3111 if ( array_key_exists( 'text', $result ) ) {
3112 // a string or null
3113 $text = $result['text'];
3114 }
3115 if ( isset( $result['nowiki'] ) ) {
3116 $nowiki = $result['nowiki'];
3117 }
3118 if ( isset( $result['isHTML'] ) ) {
3119 $isHTML = $result['isHTML'];
3120 }
3121 if ( isset( $result['forceRawInterwiki'] ) ) {
3122 $forceRawInterwiki = $result['forceRawInterwiki'];
3123 }
3124 if ( isset( $result['isChildObj'] ) ) {
3125 $isChildObj = $result['isChildObj'];
3126 }
3127 if ( isset( $result['isLocalObj'] ) ) {
3128 $isLocalObj = $result['isLocalObj'];
3129 }
3130 }
3131 }
3132
3133 # Finish mangling title and then check for loops.
3134 # Set $title to a Title object and $titleText to the PDBK
3135 if ( !$found ) {
3136 $ns = NS_TEMPLATE;
3137 # Split the title into page and subpage
3138 $subpage = '';
3139 $relative = Linker::normalizeSubpageLink(
3140 $this->getTitle(), $part1, $subpage
3141 );
3142 if ( $part1 !== $relative ) {
3143 $part1 = $relative;
3144 $ns = $this->getTitle()->getNamespace();
3145 }
3146 $title = Title::newFromText( $part1, $ns );
3147 if ( $title ) {
3148 $titleText = $title->getPrefixedText();
3149 # Check for language variants if the template is not found
3150 if ( $this->getTargetLanguageConverter()->hasVariants() && $title->getArticleID() == 0 ) {
3151 $this->getTargetLanguageConverter()->findVariantLink( $part1, $title, true );
3152 }
3153 # Do recursion depth check
3154 $limit = $this->mOptions->getMaxTemplateDepth();
3155 if ( $frame->depth >= $limit ) {
3156 $found = true;
3157 $text = '<span class="error">'
3158 . wfMessage( 'parser-template-recursion-depth-warning' )
3159 ->numParams( $limit )->inContentLanguage()->text()
3160 . '</span>';
3161 }
3162 }
3163 }
3164
3165 # Load from database
3166 if ( !$found && $title ) {
3167 $profileSection = $this->mProfiler->scopedProfileIn( $title->getPrefixedDBkey() );
3168 if ( !$title->isExternal() ) {
3169 if ( $title->isSpecialPage()
3170 && $this->mOptions->getAllowSpecialInclusion()
3171 && $this->ot['html']
3172 ) {
3173 $specialPage = $this->specialPageFactory->getPage( $title->getDBkey() );
3174 // Pass the template arguments as URL parameters.
3175 // "uselang" will have no effect since the Language object
3176 // is forced to the one defined in ParserOptions.
3177 $pageArgs = [];
3178 $argsLength = $args->getLength();
3179 for ( $i = 0; $i < $argsLength; $i++ ) {
3180 $bits = $args->item( $i )->splitArg();
3181 if ( strval( $bits['index'] ) === '' ) {
3182 $name = trim( $frame->expand( $bits['name'], PPFrame::STRIP_COMMENTS ) );
3183 $value = trim( $frame->expand( $bits['value'] ) );
3184 $pageArgs[$name] = $value;
3185 }
3186 }
3187
3188 // Create a new context to execute the special page
3189 $context = new RequestContext;
3190 $context->setTitle( $title );
3191 $context->setRequest( new FauxRequest( $pageArgs ) );
3192 if ( $specialPage && $specialPage->maxIncludeCacheTime() === 0 ) {
3193 $context->setUser( $this->userFactory->newFromUserIdentity( $this->getUserIdentity() ) );
3194 } else {
3195 // If this page is cached, then we better not be per user.
3196 $context->setUser( User::newFromName( '127.0.0.1', false ) );
3197 }
3198 $context->setLanguage( $this->mOptions->getUserLangObj() );
3199 $ret = $this->specialPageFactory->capturePath( $title, $context, $this->getLinkRenderer() );
3200 if ( $ret ) {
3201 $text = $context->getOutput()->getHTML();
3202 $this->mOutput->addOutputPageMetadata( $context->getOutput() );
3203 $found = true;
3204 $isHTML = true;
3205 if ( $specialPage && $specialPage->maxIncludeCacheTime() !== false ) {
3206 $this->mOutput->updateRuntimeAdaptiveExpiry(
3207 $specialPage->maxIncludeCacheTime()
3208 );
3209 }
3210 }
3211 } elseif ( $this->nsInfo->isNonincludable( $title->getNamespace() ) ) {
3212 $found = false; # access denied
3213 $this->logger->debug(
3214 __METHOD__ .
3215 ": template inclusion denied for " . $title->getPrefixedDBkey()
3216 );
3217 } else {
3218 [ $text, $title ] = $this->getTemplateDom( $title );
3219 if ( $text !== false ) {
3220 $found = true;
3221 $isChildObj = true;
3222 if (
3223 $title->getNamespace() === NS_TEMPLATE &&
3224 $title->getDBkey() === '=' &&
3225 $originalTitle === '='
3226 ) {
3227 // Note that we won't get here if `=` is evaluated
3228 // (in the future) as a parser function, nor if
3229 // the Template namespace is given explicitly,
3230 // ie `{{Template:=}}`. Only `{{=}}` triggers.
3231 $sawDeprecatedTemplateEquals = true; // T91154
3232 }
3233 }
3234 }
3235
3236 # If the title is valid but undisplayable, make a link to it
3237 if ( !$found && ( $this->ot['html'] || $this->ot['pre'] ) ) {
3238 $text = "[[:$titleText]]";
3239 $found = true;
3240 }
3241 } elseif ( $title->isTrans() ) {
3242 # Interwiki transclusion
3243 if ( $this->ot['html'] && !$forceRawInterwiki ) {
3244 $text = $this->interwikiTransclude( $title, 'render' );
3245 $isHTML = true;
3246 } else {
3247 $text = $this->interwikiTransclude( $title, 'raw' );
3248 # Preprocess it like a template
3249 $text = $this->preprocessToDom( $text, Preprocessor::DOM_FOR_INCLUSION );
3250 $isChildObj = true;
3251 }
3252 $found = true;
3253 }
3254
3255 # Do infinite loop check
3256 # This has to be done after redirect resolution to avoid infinite loops via redirects
3257 if ( !$frame->loopCheck( $title ) ) {
3258 $found = true;
3259 $text = '<span class="error">'
3260 . wfMessage( 'parser-template-loop-warning', $titleText )->inContentLanguage()->text()
3261 . '</span>';
3262 $this->addTrackingCategory( 'template-loop-category' );
3263 $this->mOutput->addWarningMsg(
3264 'template-loop-warning',
3265 Message::plaintextParam( $titleText )
3266 );
3267 $this->logger->debug( __METHOD__ . ": template loop broken at '$titleText'" );
3268 }
3269 }
3270
3271 # If we haven't found text to substitute by now, we're done
3272 # Recover the source wikitext and return it
3273 if ( !$found ) {
3274 $text = $frame->virtualBracketedImplode( '{{', '|', '}}', $titleWithSpaces, $args );
3275 if ( $profileSection ) {
3276 $this->mProfiler->scopedProfileOut( $profileSection );
3277 }
3278 return [ 'object' => $text ];
3279 }
3280
3281 # Expand DOM-style return values in a child frame
3282 if ( $isChildObj ) {
3283 # Clean up argument array
3284 $newFrame = $frame->newChild( $args, $title );
3285
3286 if ( $nowiki ) {
3287 $text = $newFrame->expand( $text, PPFrame::RECOVER_ORIG );
3288 } elseif ( $titleText !== false && $newFrame->isEmpty() ) {
3289 # Expansion is eligible for the empty-frame cache
3290 $text = $newFrame->cachedExpand( $titleText, $text );
3291 } else {
3292 # Uncached expansion
3293 $text = $newFrame->expand( $text );
3294 }
3295 }
3296 if ( $isLocalObj && $nowiki ) {
3297 $text = $frame->expand( $text, PPFrame::RECOVER_ORIG );
3298 $isLocalObj = false;
3299 }
3300
3301 if ( $profileSection ) {
3302 $this->mProfiler->scopedProfileOut( $profileSection );
3303 }
3304 if (
3305 $sawDeprecatedTemplateEquals &&
3306 $this->mStripState->unstripBoth( $text ) !== '='
3307 ) {
3308 // T91154: {{=}} is deprecated when it doesn't expand to `=`;
3309 // use {{Template:=}} if you must.
3310 $this->addTrackingCategory( 'template-equals-category' );
3311 $this->mOutput->addWarningMsg( 'template-equals-warning' );
3312 }
3313
3314 # Replace raw HTML by a placeholder
3315 if ( $isHTML ) {
3316 $text = $this->insertStripItem( $text );
3317 } elseif ( $nowiki && ( $this->ot['html'] || $this->ot['pre'] ) ) {
3318 # Escape nowiki-style return values
3319 $text = wfEscapeWikiText( $text );
3320 } elseif ( is_string( $text )
3321 && !$piece['lineStart']
3322 && preg_match( '/^(?:{\\||:|;|#|\*)/', $text )
3323 ) {
3324 # T2529: if the template begins with a table or block-level
3325 # element, it should be treated as beginning a new line.
3326 # This behavior is somewhat controversial.
3327 $text = "\n" . $text;
3328 }
3329
3330 if ( is_string( $text ) && !$this->incrementIncludeSize( 'post-expand', strlen( $text ) ) ) {
3331 # Error, oversize inclusion
3332 if ( $titleText !== false ) {
3333 # Make a working, properly escaped link if possible (T25588)
3334 $text = "[[:$titleText]]";
3335 } else {
3336 # This will probably not be a working link, but at least it may
3337 # provide some hint of where the problem is
3338 $originalTitle = preg_replace( '/^:/', '', $originalTitle );
3339 $text = "[[:$originalTitle]]";
3340 }
3341 $text .= $this->insertStripItem( '<!-- WARNING: template omitted, '
3342 . 'post-expand include size too large -->' );
3343 $this->limitationWarn( 'post-expand-template-inclusion' );
3344 }
3345
3346 if ( $isLocalObj ) {
3347 $ret = [ 'object' => $text ];
3348 } else {
3349 $ret = [ 'text' => $text ];
3350 }
3351
3352 return $ret;
3353 }
3354
3373 public function callParserFunction( PPFrame $frame, $function, array $args = [] ) {
3374 # Case sensitive functions
3375 if ( isset( $this->mFunctionSynonyms[1][$function] ) ) {
3376 $function = $this->mFunctionSynonyms[1][$function];
3377 } else {
3378 # Case insensitive functions
3379 $function = $this->contLang->lc( $function );
3380 if ( isset( $this->mFunctionSynonyms[0][$function] ) ) {
3381 $function = $this->mFunctionSynonyms[0][$function];
3382 } else {
3383 return [ 'found' => false ];
3384 }
3385 }
3386
3387 [ $callback, $flags ] = $this->mFunctionHooks[$function];
3388
3389 $allArgs = [ $this ];
3390 if ( $flags & self::SFH_OBJECT_ARGS ) {
3391 # Convert arguments to PPNodes and collect for appending to $allArgs
3392 $funcArgs = [];
3393 foreach ( $args as $k => $v ) {
3394 if ( $v instanceof PPNode || $k === 0 ) {
3395 $funcArgs[] = $v;
3396 } else {
3397 $funcArgs[] = $this->mPreprocessor->newPartNodeArray( [ $k => $v ] )->item( 0 );
3398 }
3399 }
3400
3401 # Add a frame parameter, and pass the arguments as an array
3402 $allArgs[] = $frame;
3403 $allArgs[] = $funcArgs;
3404 } else {
3405 # Convert arguments to plain text and append to $allArgs
3406 foreach ( $args as $k => $v ) {
3407 if ( $v instanceof PPNode ) {
3408 $allArgs[] = trim( $frame->expand( $v ) );
3409 } elseif ( is_int( $k ) && $k >= 0 ) {
3410 $allArgs[] = trim( $v );
3411 } else {
3412 $allArgs[] = trim( "$k=$v" );
3413 }
3414 }
3415 }
3416
3417 $result = $callback( ...$allArgs );
3418
3419 # The interface for function hooks allows them to return a wikitext
3420 # string or an array containing the string and any flags. This mungs
3421 # things around to match what this method should return.
3422 if ( !is_array( $result ) ) {
3423 $result = [
3424 'found' => true,
3425 'text' => $result,
3426 ];
3427 } else {
3428 if ( isset( $result[0] ) && !isset( $result['text'] ) ) {
3429 $result['text'] = $result[0];
3430 }
3431 unset( $result[0] );
3432 $result += [
3433 'found' => true,
3434 ];
3435 }
3436
3437 $noparse = true;
3438 $preprocessFlags = 0;
3439 if ( isset( $result['noparse'] ) ) {
3440 $noparse = $result['noparse'];
3441 }
3442 if ( isset( $result['preprocessFlags'] ) ) {
3443 $preprocessFlags = $result['preprocessFlags'];
3444 }
3445
3446 if ( !$noparse ) {
3447 $result['text'] = $this->preprocessToDom( $result['text'], $preprocessFlags );
3448 $result['isChildObj'] = true;
3449 }
3450
3451 return $result;
3452 }
3453
3463 public function getTemplateDom( LinkTarget $title ) {
3464 $cacheTitle = $title;
3465 $titleKey = CacheKeyHelper::getKeyForPage( $title );
3466
3467 if ( isset( $this->mTplRedirCache[$titleKey] ) ) {
3468 [ $ns, $dbk ] = $this->mTplRedirCache[$titleKey];
3469 $title = Title::makeTitle( $ns, $dbk );
3470 $titleKey = CacheKeyHelper::getKeyForPage( $title );
3471 }
3472 if ( isset( $this->mTplDomCache[$titleKey] ) ) {
3473 return [ $this->mTplDomCache[$titleKey], $title ];
3474 }
3475
3476 # Cache miss, go to the database
3477 [ $text, $title ] = $this->fetchTemplateAndTitle( $title );
3478
3479 if ( $text === false ) {
3480 $this->mTplDomCache[$titleKey] = false;
3481 return [ false, $title ];
3482 }
3483
3484 $dom = $this->preprocessToDom( $text, Preprocessor::DOM_FOR_INCLUSION );
3485 $this->mTplDomCache[$titleKey] = $dom;
3486
3487 if ( !$title->isSamePageAs( $cacheTitle ) ) {
3488 $this->mTplRedirCache[ CacheKeyHelper::getKeyForPage( $cacheTitle ) ] =
3489 [ $title->getNamespace(), $title->getDBkey() ];
3490 }
3491
3492 return [ $dom, $title ];
3493 }
3494
3509 $cacheKey = CacheKeyHelper::getKeyForPage( $link );
3510 if ( !$this->currentRevisionCache ) {
3511 $this->currentRevisionCache = new MapCacheLRU( 100 );
3512 }
3513 if ( !$this->currentRevisionCache->has( $cacheKey ) ) {
3514 $title = Title::newFromLinkTarget( $link ); // hook signature compat
3515 $revisionRecord =
3516 // Defaults to Parser::statelessFetchRevisionRecord()
3517 call_user_func(
3518 $this->mOptions->getCurrentRevisionRecordCallback(),
3519 $title,
3520 $this
3521 );
3522 if ( $revisionRecord === false ) {
3523 // Parser::statelessFetchRevisionRecord() can return false;
3524 // normalize it to null.
3525 $revisionRecord = null;
3526 }
3527 $this->currentRevisionCache->set( $cacheKey, $revisionRecord );
3528 }
3529 return $this->currentRevisionCache->get( $cacheKey );
3530 }
3531
3539 $key = CacheKeyHelper::getKeyForPage( $link );
3540 return (
3541 $this->currentRevisionCache &&
3542 $this->currentRevisionCache->has( $key )
3543 );
3544 }
3545
3554 public static function statelessFetchRevisionRecord( LinkTarget $link, $parser = null ) {
3555 if ( $link instanceof PageIdentity ) {
3556 // probably a Title, just use it.
3557 $page = $link;
3558 } else {
3559 // XXX: use RevisionStore::getPageForLink()!
3560 // ...but get the info for the current revision at the same time?
3561 // Should RevisionStore::getKnownCurrentRevision accept a LinkTarget?
3562 $page = Title::newFromLinkTarget( $link );
3563 }
3564
3565 $revRecord = MediaWikiServices::getInstance()
3566 ->getRevisionLookup()
3567 ->getKnownCurrentRevision( $page );
3568 return $revRecord;
3569 }
3570
3577 public function fetchTemplateAndTitle( LinkTarget $link ) {
3578 // Use Title for compatibility with callbacks and return type
3579 $title = Title::newFromLinkTarget( $link );
3580
3581 // Defaults to Parser::statelessFetchTemplate()
3582 $templateCb = $this->mOptions->getTemplateCallback();
3583 $stuff = $templateCb( $title, $this );
3584 $revRecord = $stuff['revision-record'] ?? null;
3585
3586 $text = $stuff['text'];
3587 if ( is_string( $stuff['text'] ) ) {
3588 // We use U+007F DELETE to distinguish strip markers from regular text
3589 $text = strtr( $text, "\x7f", "?" );
3590 }
3591 $finalTitle = $stuff['finalTitle'] ?? $title;
3592 foreach ( ( $stuff['deps'] ?? [] ) as $dep ) {
3593 $this->mOutput->addTemplate( $dep['title'], $dep['page_id'], $dep['rev_id'] );
3594 if ( $dep['title']->equals( $this->getTitle() ) && $revRecord instanceof RevisionRecord ) {
3595 // Self-transclusion; final result may change based on the new page version
3596 try {
3597 $sha1 = $revRecord->getSha1();
3598 } catch ( RevisionAccessException $e ) {
3599 $sha1 = null;
3600 }
3601 $this->setOutputFlag( ParserOutputFlags::VARY_REVISION_SHA1, 'Self transclusion' );
3602 $this->getOutput()->setRevisionUsedSha1Base36( $sha1 );
3603 }
3604 }
3605
3606 return [ $text, $finalTitle ];
3607 }
3608
3619 public static function statelessFetchTemplate( $page, $parser = false ) {
3620 $title = Title::castFromLinkTarget( $page ); // for compatibility with return type
3621 $text = $skip = false;
3622 $finalTitle = $title;
3623 $deps = [];
3624 $revRecord = null;
3625 $contextTitle = $parser ? $parser->getTitle() : null;
3626
3627 $services = MediaWikiServices::getInstance();
3628 # Loop to fetch the article, with up to 2 redirects
3629 $revLookup = $services->getRevisionLookup();
3630 $hookRunner = new HookRunner( $services->getHookContainer() );
3631 for ( $i = 0; $i < 3 && is_object( $title ); $i++ ) {
3632 # Give extensions a chance to select the revision instead
3633 $revRecord = null; # Assume no hook
3634 $id = false; # Assume current
3635 $origTitle = $title;
3636 $titleChanged = false;
3637 $hookRunner->onBeforeParserFetchTemplateRevisionRecord(
3638 # The $title is a not a PageIdentity, as it may
3639 # contain fragments or even represent an attempt to transclude
3640 # a broken or otherwise-missing Title, which the hook may
3641 # fix up. Similarly, the $contextTitle may represent a special
3642 # page or other page which "exists" as a parsing context but
3643 # is not in the DB.
3644 $contextTitle, $title,
3645 $skip, $revRecord
3646 );
3647
3648 if ( $skip ) {
3649 $text = false;
3650 $deps[] = [
3651 'title' => $title,
3652 'page_id' => $title->getArticleID(),
3653 'rev_id' => null
3654 ];
3655 break;
3656 }
3657 # Get the revision
3658 if ( !$revRecord ) {
3659 if ( $id ) {
3660 # Handle $id returned by deprecated legacy hook
3661 $revRecord = $revLookup->getRevisionById( $id );
3662 } elseif ( $parser ) {
3663 $revRecord = $parser->fetchCurrentRevisionRecordOfTitle( $title );
3664 } else {
3665 $revRecord = $revLookup->getRevisionByTitle( $title );
3666 }
3667 }
3668 if ( $revRecord ) {
3669 # Update title, as $revRecord may have been changed by hook
3670 $title = Title::newFromLinkTarget(
3671 $revRecord->getPageAsLinkTarget()
3672 );
3673 $deps[] = [
3674 'title' => $title,
3675 'page_id' => $revRecord->getPageId(),
3676 'rev_id' => $revRecord->getId(),
3677 ];
3678 } else {
3679 $deps[] = [
3680 'title' => $title,
3681 'page_id' => $title->getArticleID(),
3682 'rev_id' => null,
3683 ];
3684 }
3685 if ( !$title->equals( $origTitle ) ) {
3686 # If we fetched a rev from a different title, register
3687 # the original title too...
3688 $deps[] = [
3689 'title' => $origTitle,
3690 'page_id' => $origTitle->getArticleID(),
3691 'rev_id' => null,
3692 ];
3693 $titleChanged = true;
3694 }
3695 # If there is no current revision, there is no page
3696 if ( $revRecord === null || $revRecord->getId() === null ) {
3697 $linkCache = $services->getLinkCache();
3698 $linkCache->addBadLinkObj( $title );
3699 }
3700 if ( $revRecord ) {
3701 if ( $titleChanged && !$revRecord->hasSlot( SlotRecord::MAIN ) ) {
3702 // We've added this (missing) title to the dependencies;
3703 // give the hook another chance to redirect it to an
3704 // actual page.
3705 $text = false;
3706 $finalTitle = $title;
3707 continue;
3708 }
3709 if ( $revRecord->hasSlot( SlotRecord::MAIN ) ) { // T276476
3710 $content = $revRecord->getContent( SlotRecord::MAIN );
3711 $text = $content ? $content->getWikitextForTransclusion() : null;
3712 } else {
3713 $text = false;
3714 }
3715
3716 if ( $text === false || $text === null ) {
3717 $text = false;
3718 break;
3719 }
3720 } elseif ( $title->getNamespace() === NS_MEDIAWIKI ) {
3721 $message = wfMessage( $services->getContentLanguage()->
3722 lcfirst( $title->getText() ) )->inContentLanguage();
3723 if ( !$message->exists() ) {
3724 $text = false;
3725 break;
3726 }
3727 $text = $message->plain();
3728 break;
3729 } else {
3730 break;
3731 }
3732 // @phan-suppress-next-line PhanPossiblyUndeclaredVariable Only reached when content is set
3733 if ( !$content ) {
3734 break;
3735 }
3736 # Redirect?
3737 $finalTitle = $title;
3738 $title = $content->getRedirectTarget();
3739 }
3740
3741 $retValues = [
3742 // previously, when this also returned a Revision object, we set
3743 // 'revision-record' to false instead of null if it was unavailable,
3744 // so that callers to use isset and then rely on the revision-record
3745 // key instead of the revision key, even if there was no corresponding
3746 // object - we continue to set to false here for backwards compatability
3747 'revision-record' => $revRecord ?: false,
3748 'text' => $text,
3749 'finalTitle' => $finalTitle,
3750 'deps' => $deps
3751 ];
3752 return $retValues;
3753 }
3754
3763 public function fetchFileAndTitle( LinkTarget $link, array $options = [] ) {
3764 $file = $this->fetchFileNoRegister( $link, $options );
3765
3766 $time = $file ? $file->getTimestamp() : false;
3767 $sha1 = $file ? $file->getSha1() : false;
3768 # Register the file as a dependency...
3769 $this->mOutput->addImage( $link, $time, $sha1 );
3770 if ( $file && !$link->isSameLinkAs( $file->getTitle() ) ) {
3771 # Update fetched file title after resolving redirects, etc.
3772 $link = $file->getTitle();
3773 $this->mOutput->addImage( $link, $time, $sha1 );
3774 }
3775
3776 $title = Title::newFromLinkTarget( $link ); // for return type compat
3777 return [ $file, $title ];
3778 }
3779
3790 protected function fetchFileNoRegister( LinkTarget $link, array $options = [] ) {
3791 if ( isset( $options['broken'] ) ) {
3792 $file = false; // broken thumbnail forced by hook
3793 } else {
3794 $repoGroup = MediaWikiServices::getInstance()->getRepoGroup();
3795 if ( isset( $options['sha1'] ) ) { // get by (sha1,timestamp)
3796 $file = $repoGroup->findFileFromKey( $options['sha1'], $options );
3797 } else { // get by (name,timestamp)
3798 $file = $repoGroup->findFile( $link, $options );
3799 }
3800 }
3801 return $file;
3802 }
3803
3813 public function interwikiTransclude( LinkTarget $link, $action ) {
3814 if ( !$this->svcOptions->get( MainConfigNames::EnableScaryTranscluding ) ) {
3815 return wfMessage( 'scarytranscludedisabled' )->inContentLanguage()->text();
3816 }
3817
3818 // TODO: extract relevant functionality from Title
3819 $title = Title::newFromLinkTarget( $link );
3820
3821 $url = $title->getFullURL( [ 'action' => $action ] );
3822 if ( strlen( $url ) > 1024 ) {
3823 return wfMessage( 'scarytranscludetoolong' )->inContentLanguage()->text();
3824 }
3825
3826 $wikiId = $title->getTransWikiID(); // remote wiki ID or false
3827
3828 $fname = __METHOD__;
3829
3830 $cache = $this->wanCache;
3831 $data = $cache->getWithSetCallback(
3832 $cache->makeGlobalKey(
3833 'interwiki-transclude',
3834 ( $wikiId !== false ) ? $wikiId : 'external',
3835 sha1( $url )
3836 ),
3837 $this->svcOptions->get( MainConfigNames::TranscludeCacheExpiry ),
3838 function ( $oldValue, &$ttl ) use ( $url, $fname, $cache ) {
3839 $req = $this->httpRequestFactory->create( $url, [], $fname );
3840
3841 $status = $req->execute(); // Status object
3842 if ( !$status->isOK() ) {
3843 $ttl = $cache::TTL_UNCACHEABLE;
3844 } elseif ( $req->getResponseHeader( 'X-Database-Lagged' ) !== null ) {
3845 $ttl = min( $cache::TTL_LAGGED, $ttl );
3846 }
3847
3848 return [
3849 'text' => $status->isOK() ? $req->getContent() : null,
3850 'code' => $req->getStatus()
3851 ];
3852 },
3853 [
3854 'checkKeys' => ( $wikiId !== false )
3855 ? [ $cache->makeGlobalKey( 'interwiki-page', $wikiId, $title->getDBkey() ) ]
3856 : [],
3857 'pcGroup' => 'interwiki-transclude:5',
3858 'pcTTL' => $cache::TTL_PROC_LONG
3859 ]
3860 );
3861
3862 if ( is_string( $data['text'] ) ) {
3863 $text = $data['text'];
3864 } elseif ( $data['code'] != 200 ) {
3865 // Though we failed to fetch the content, this status is useless.
3866 $text = wfMessage( 'scarytranscludefailed-httpstatus' )
3867 ->params( $url, $data['code'] )->inContentLanguage()->text();
3868 } else {
3869 $text = wfMessage( 'scarytranscludefailed', $url )->inContentLanguage()->text();
3870 }
3871
3872 return $text;
3873 }
3874
3884 public function argSubstitution( array $piece, PPFrame $frame ) {
3885 $error = false;
3886 $parts = $piece['parts'];
3887 $nameWithSpaces = $frame->expand( $piece['title'] );
3888 $argName = trim( $nameWithSpaces );
3889 $object = false;
3890 $text = $frame->getArgument( $argName );
3891 if ( $text === false && $parts->getLength() > 0
3892 && ( $this->ot['html']
3893 || $this->ot['pre']
3894 || ( $this->ot['wiki'] && $frame->isTemplate() )
3895 )
3896 ) {
3897 # No match in frame, use the supplied default
3898 $object = $parts->item( 0 )->getChildren();
3899 }
3900 if ( !$this->incrementIncludeSize( 'arg', strlen( $text ) ) ) {
3901 $error = '<!-- WARNING: argument omitted, expansion size too large -->';
3902 $this->limitationWarn( 'post-expand-template-argument' );
3903 }
3904
3905 if ( $text === false && $object === false ) {
3906 # No match anywhere
3907 $object = $frame->virtualBracketedImplode( '{{{', '|', '}}}', $nameWithSpaces, $parts );
3908 }
3909 if ( $error !== false ) {
3910 $text .= $error;
3911 }
3912 if ( $object !== false ) {
3913 $ret = [ 'object' => $object ];
3914 } else {
3915 $ret = [ 'text' => $text ];
3916 }
3917
3918 return $ret;
3919 }
3920
3925 public function tagNeedsNowikiStrippedInTagPF( string $lowerTagName ): bool {
3926 $parsoidSiteConfig = MediaWikiServices::getInstance()->getParsoidSiteConfig();
3927 return $parsoidSiteConfig->tagNeedsNowikiStrippedInTagPF( $lowerTagName );
3928 }
3929
3949 public function extensionSubstitution( array $params, PPFrame $frame, bool $processNowiki = false ) {
3950 static $errorStr = '<span class="error">';
3951
3952 $name = $frame->expand( $params['name'] );
3953 if ( str_starts_with( $name, $errorStr ) ) {
3954 // Probably expansion depth or node count exceeded. Just punt the
3955 // error up.
3956 return $name;
3957 }
3958
3959 // Parse attributes from XML-like wikitext syntax
3960 $attrText = !isset( $params['attr'] ) ? '' : $frame->expand( $params['attr'] );
3961 if ( str_starts_with( $attrText, $errorStr ) ) {
3962 // See above
3963 return $attrText;
3964 }
3965
3966 // We can't safely check if the expansion for $content resulted in an
3967 // error, because the content could happen to be the error string
3968 // (T149622).
3969 $content = !isset( $params['inner'] ) ? null : $frame->expand( $params['inner'] );
3970
3971 $marker = self::MARKER_PREFIX . "-$name-"
3972 . sprintf( '%08X', $this->mMarkerIndex++ ) . self::MARKER_SUFFIX;
3973
3974 $normalizedName = strtolower( $name );
3975 $isNowiki = $normalizedName === 'nowiki';
3976 $markerType = $isNowiki ? 'nowiki' : 'general';
3977 if ( $this->ot['html'] || ( $processNowiki && $isNowiki ) ) {
3978 $attributes = Sanitizer::decodeTagAttributes( $attrText );
3979 // Merge in attributes passed via {{#tag:}} parser function
3980 if ( isset( $params['attributes'] ) ) {
3981 $attributes += $params['attributes'];
3982 }
3983
3984 if ( isset( $this->mTagHooks[$normalizedName] ) ) {
3985 // Note that $content may be null here, for example if the
3986 // tag is self-closed.
3987 $output = call_user_func_array( $this->mTagHooks[$normalizedName],
3988 [ $content, $attributes, $this, $frame ] );
3989 } else {
3990 $output = '<span class="error">Invalid tag extension name: ' .
3991 htmlspecialchars( $normalizedName ) . '</span>';
3992 }
3993
3994 if ( is_array( $output ) ) {
3995 // Extract flags
3996 $flags = $output;
3997 $output = $flags[0];
3998 if ( isset( $flags['markerType'] ) ) {
3999 $markerType = $flags['markerType'];
4000 }
4001 }
4002 } else {
4003 // We're substituting a {{subst:#tag:}} parser function.
4004 // Convert the attributes it passed into the XML-like string.
4005 if ( isset( $params['attributes'] ) ) {
4006 foreach ( $params['attributes'] as $attrName => $attrValue ) {
4007 $attrText .= ' ' . htmlspecialchars( $attrName ) . '="' .
4008 htmlspecialchars( $this->getStripState()->unstripBoth( $attrValue ), ENT_COMPAT ) . '"';
4009 }
4010 }
4011 if ( $content === null ) {
4012 $output = "<$name$attrText/>";
4013 } else {
4014 $close = $params['close'] === null ? '' : $frame->expand( $params['close'] );
4015 if ( str_starts_with( $close, $errorStr ) ) {
4016 // See above
4017 return $close;
4018 }
4019 $output = "<$name$attrText>$content$close";
4020 }
4021 }
4022
4023 if ( $markerType === 'none' ) {
4024 return $output;
4025 } elseif ( $markerType === 'nowiki' ) {
4026 $this->mStripState->addNoWiki( $marker, $output );
4027 } elseif ( $markerType === 'general' ) {
4028 $this->mStripState->addGeneral( $marker, $output );
4029 } else {
4030 throw new UnexpectedValueException( __METHOD__ . ': invalid marker type' );
4031 }
4032 return $marker;
4033 }
4034
4042 private function incrementIncludeSize( $type, $size ) {
4043 if ( $this->mIncludeSizes[$type] + $size > $this->mOptions->getMaxIncludeSize() ) {
4044 return false;
4045 } else {
4046 $this->mIncludeSizes[$type] += $size;
4047 return true;
4048 }
4049 }
4050
4056 $this->mExpensiveFunctionCount++;
4057 return $this->mExpensiveFunctionCount <= $this->mOptions->getExpensiveParserFunctionLimit();
4058 }
4059
4067 private function handleDoubleUnderscore( $text ) {
4068 # The position of __TOC__ needs to be recorded
4069 $mw = $this->magicWordFactory->get( 'toc' );
4070 if ( $mw->match( $text ) ) {
4071 $this->mShowToc = true;
4072 $this->mForceTocPosition = true;
4073
4074 # Set a placeholder. At the end we'll fill it in with the TOC.
4075 $text = $mw->replace( self::TOC_PLACEHOLDER, $text, 1 );
4076
4077 # Only keep the first one.
4078 $text = $mw->replace( '', $text );
4079 # For consistency with all other double-underscores
4080 # (see below)
4081 $this->mOutput->setPageProperty( 'toc', '' );
4082 }
4083
4084 # Now match and remove the rest of them
4085 $mwa = $this->magicWordFactory->getDoubleUnderscoreArray();
4086 $this->mDoubleUnderscores = $mwa->matchAndRemove( $text );
4087
4088 if ( isset( $this->mDoubleUnderscores['nogallery'] ) ) {
4089 $this->mOutput->setNoGallery( true );
4090 }
4091 if ( isset( $this->mDoubleUnderscores['notoc'] ) && !$this->mForceTocPosition ) {
4092 $this->mShowToc = false;
4093 }
4094 if ( isset( $this->mDoubleUnderscores['hiddencat'] )
4095 && $this->getTitle()->getNamespace() === NS_CATEGORY
4096 ) {
4097 $this->addTrackingCategory( 'hidden-category-category' );
4098 }
4099 # (T10068) Allow control over whether robots index a page.
4100 # __INDEX__ always overrides __NOINDEX__, see T16899
4101 if ( isset( $this->mDoubleUnderscores['noindex'] ) && $this->getTitle()->canUseNoindex() ) {
4102 $this->mOutput->setIndexPolicy( 'noindex' );
4103 $this->addTrackingCategory( 'noindex-category' );
4104 }
4105 if ( isset( $this->mDoubleUnderscores['index'] ) && $this->getTitle()->canUseNoindex() ) {
4106 $this->mOutput->setIndexPolicy( 'index' );
4107 $this->addTrackingCategory( 'index-category' );
4108 }
4109
4110 # Cache all double underscores in the database
4111 foreach ( $this->mDoubleUnderscores as $key => $val ) {
4112 $this->mOutput->setPageProperty( $key, '' );
4113 }
4114
4115 return $text;
4116 }
4117
4124 public function addTrackingCategory( $msg ) {
4125 return $this->trackingCategories->addTrackingCategory(
4126 $this->mOutput, $msg, $this->getPage()
4127 );
4128 }
4129
4143 public function msg( string $msg, ...$args ): Message {
4144 return wfMessage( $msg, ...$args )
4145 ->inLanguage( $this->getTargetLanguage() )
4146 ->page( $this->getPage() );
4147 }
4148
4149 private function cleanUpTocLine( Node $container ) {
4150 '@phan-var Element|DocumentFragment $container'; // @var Element|DocumentFragment $container
4151 # Strip out HTML
4152 # Allowed tags are:
4153 # * <sup> and <sub> (T10393)
4154 # * <i> (T28375)
4155 # * <b> (r105284)
4156 # * <bdi> (T74884)
4157 # * <span dir="rtl"> and <span dir="ltr"> (T37167)
4158 # * <s> and <strike> (T35715)
4159 # * <q> (T251672)
4160 # We strip any parameter from accepted tags, except dir="rtl|ltr" from <span>,
4161 # to allow setting directionality in toc items.
4162 $allowedTags = [ 'span', 'sup', 'sub', 'bdi', 'i', 'b', 's', 'strike', 'q' ];
4163 $node = $container->firstChild;
4164 while ( $node !== null ) {
4165 $next = $node->nextSibling;
4166 if ( $node instanceof Element ) {
4167 $nodeName = DOMCompat::nodeName( $node );
4168 if ( in_array( $nodeName, [ 'style', 'script' ], true ) ) {
4169 # Remove any <style> or <script> tags (T198618)
4170 DOMCompat::remove( $node );
4171 } elseif ( in_array( $nodeName, $allowedTags, true ) ) {
4172 // Keep tag, remove attributes
4173 $removeAttrs = [];
4174 foreach ( $node->attributes as $attr ) {
4175 if (
4176 $nodeName === 'span' && $attr->name === 'dir'
4177 && ( $attr->value === 'rtl' || $attr->value === 'ltr' )
4178 ) {
4179 // Keep <span dir="rtl"> and <span dir="ltr">
4180 continue;
4181 }
4182 $removeAttrs[] = $attr;
4183 }
4184 foreach ( $removeAttrs as $attr ) {
4185 $node->removeAttributeNode( $attr );
4186 }
4187 $this->cleanUpTocLine( $node );
4188 # Strip '<span></span>', which is the result from the above if
4189 # <span id="foo"></span> is used to produce an additional anchor
4190 # for a section.
4191 if ( $nodeName === 'span' && !$node->hasChildNodes() ) {
4192 DOMCompat::remove( $node );
4193 }
4194 } else {
4195 // Strip tag
4196 $next = $node->firstChild;
4197 while ( $childNode = $node->firstChild ) {
4198 $node->parentNode->insertBefore( $childNode, $node );
4199 }
4200 DOMCompat::remove( $node );
4201 }
4202 } elseif ( $node instanceof Comment ) {
4203 // Extensions may add comments to headings;
4204 // these shouldn't appear in the ToC either.
4205 DOMCompat::remove( $node );
4206 }
4207 $node = $next;
4208 }
4209 }
4210
4226 private function finalizeHeadings( $text, $origText, $isMain = true ) {
4227 # Inhibit editsection links if requested in the page
4228 if ( isset( $this->mDoubleUnderscores['noeditsection'] ) ) {
4229 $maybeShowEditLink = false;
4230 } else {
4231 $maybeShowEditLink = true; /* Actual presence will depend on post-cache transforms */
4232 }
4233
4234 # Get all headlines for numbering them and adding funky stuff like [edit]
4235 # links - this is for later, but we need the number of headlines right now
4236 # NOTE: white space in headings have been trimmed in handleHeadings. They shouldn't
4237 # be trimmed here since whitespace in HTML headings is significant.
4238 $matches = [];
4239 $numMatches = preg_match_all(
4240 '/<H(?P<level>[1-6])(?P<attrib>.*?>)(?P<header>[\s\S]*?)<\/H[1-6] *>/i',
4241 $text,
4242 $matches
4243 );
4244
4245 # if there are fewer than 4 headlines in the article, do not show TOC
4246 # unless it's been explicitly enabled.
4247 $enoughToc = $this->mShowToc &&
4248 ( ( $numMatches >= 4 ) || $this->mForceTocPosition );
4249
4250 # Allow user to stipulate that a page should have a "new section"
4251 # link added via __NEWSECTIONLINK__
4252 if ( isset( $this->mDoubleUnderscores['newsectionlink'] ) ) {
4253 $this->mOutput->setNewSection( true );
4254 }
4255
4256 # Allow user to remove the "new section"
4257 # link via __NONEWSECTIONLINK__
4258 if ( isset( $this->mDoubleUnderscores['nonewsectionlink'] ) ) {
4259 $this->mOutput->setHideNewSection( true );
4260 }
4261
4262 # if the string __FORCETOC__ (not case-sensitive) occurs in the HTML,
4263 # override above conditions and always show TOC above first header
4264 if ( isset( $this->mDoubleUnderscores['forcetoc'] ) ) {
4265 $this->mShowToc = true;
4266 $enoughToc = true;
4267 }
4268
4269 # headline counter
4270 $headlineCount = 0;
4271 $haveTocEntries = false;
4272
4273 # Ugh .. the TOC should have neat indentation levels which can be
4274 # passed to the skin functions. These are determined here
4275 $full = '';
4276 $head = [];
4277 $level = 0;
4278 $tocData = new TOCData();
4279 $markerRegex = self::MARKER_PREFIX . "-h-(\d+)-" . self::MARKER_SUFFIX;
4280 $baseTitleText = $this->getTitle()->getPrefixedDBkey();
4281 $oldType = $this->mOutputType;
4282 $this->setOutputType( self::OT_WIKI );
4283 $frame = $this->getPreprocessor()->newFrame();
4284 $root = $this->preprocessToDom( $origText );
4285 $node = $root->getFirstChild();
4286 $cpOffset = 0;
4287 $refers = [];
4288
4289 $headlines = $numMatches !== false ? $matches[3] : [];
4290
4291 $maxTocLevel = $this->svcOptions->get( MainConfigNames::MaxTocLevel );
4292 $domDocument = DOMUtils::parseHTML( '' );
4293 foreach ( $headlines as $headline ) {
4294 $isTemplate = false;
4295 $titleText = false;
4296 $sectionIndex = false;
4297 $markerMatches = [];
4298 if ( preg_match( "/^$markerRegex/", $headline, $markerMatches ) ) {
4299 $serial = (int)$markerMatches[1];
4300 [ $titleText, $sectionIndex ] = $this->mHeadings[$serial];
4301 $isTemplate = ( $titleText != $baseTitleText );
4302 $headline = preg_replace( "/^$markerRegex\\s*/", "", $headline );
4303 }
4304
4305 $sectionMetadata = SectionMetadata::fromLegacy( [
4306 "fromtitle" => $titleText ?: null,
4307 "index" => $sectionIndex === false
4308 ? '' : ( ( $isTemplate ? 'T-' : '' ) . $sectionIndex )
4309 ] );
4310 $tocData->addSection( $sectionMetadata );
4311
4312 $oldLevel = $level;
4313 $level = (int)$matches[1][$headlineCount];
4314 $tocData->processHeading( $oldLevel, $level, $sectionMetadata );
4315
4316 if ( $tocData->getCurrentTOCLevel() < $maxTocLevel ) {
4317 $haveTocEntries = true;
4318 }
4319
4320 # The safe header is a version of the header text safe to use for links
4321
4322 # Remove link placeholders by the link text.
4323 # <!--LINK number-->
4324 # turns into
4325 # link text with suffix
4326 # Do this before unstrip since link text can contain strip markers
4327 $safeHeadline = $this->replaceLinkHoldersText( $headline );
4328
4329 # Avoid insertion of weird stuff like <math> by expanding the relevant sections
4330 $safeHeadline = $this->mStripState->unstripBoth( $safeHeadline );
4331
4332 // Run Tidy to convert wikitext entities to HTML entities (T355386),
4333 // conveniently also giving us a way to handle French spaces (T324763)
4334 $safeHeadline = $this->tidy->tidy( $safeHeadline, [ Sanitizer::class, 'armorFrenchSpaces' ] );
4335
4336 // Parse the heading contents as HTML. This makes it easier to strip out some HTML tags,
4337 // and ensures that we generate balanced HTML at the end (T218330).
4338 $headlineDom = DOMUtils::parseHTMLToFragment( $domDocument, $safeHeadline );
4339
4340 $this->cleanUpTocLine( $headlineDom );
4341
4342 // Serialize back to HTML
4343 $tocline = trim( DOMUtils::getFragmentInnerHTML( $headlineDom ) );
4344
4345 # For the anchor, strip out HTML-y stuff period
4346 $safeHeadline = trim( $headlineDom->textContent );
4347 # Save headline for section edit hint before it's normalized for the link
4348 $headlineHint = htmlspecialchars( $safeHeadline );
4349
4350 $safeHeadline = Sanitizer::normalizeSectionNameWhitespace( $safeHeadline );
4351 $safeHeadline = self::normalizeSectionName( $safeHeadline );
4352
4353 $fallbackHeadline = Sanitizer::escapeIdForAttribute( $safeHeadline, Sanitizer::ID_FALLBACK );
4354 $linkAnchor = Sanitizer::escapeIdForLink( $safeHeadline );
4355 $safeHeadline = Sanitizer::escapeIdForAttribute( $safeHeadline, Sanitizer::ID_PRIMARY );
4356 if ( $fallbackHeadline === $safeHeadline ) {
4357 # No reason to have both (in fact, we can't)
4358 $fallbackHeadline = false;
4359 }
4360
4361 # HTML IDs must be case-insensitively unique for IE compatibility (T12721).
4362 $arrayKey = strtolower( $safeHeadline );
4363 if ( $fallbackHeadline === false ) {
4364 $fallbackArrayKey = false;
4365 } else {
4366 $fallbackArrayKey = strtolower( $fallbackHeadline );
4367 }
4368
4369 # Create the anchor for linking from the TOC to the section
4370 $anchor = $safeHeadline;
4371 $fallbackAnchor = $fallbackHeadline;
4372 if ( isset( $refers[$arrayKey] ) ) {
4373 for ( $i = 2; isset( $refers["{$arrayKey}_$i"] ); ++$i );
4374 $anchor .= "_$i";
4375 $linkAnchor .= "_$i";
4376 $refers["{$arrayKey}_$i"] = true;
4377 } else {
4378 $refers[$arrayKey] = true;
4379 }
4380 if ( $fallbackHeadline !== false && isset( $refers[$fallbackArrayKey] ) ) {
4381 for ( $i = 2; isset( $refers["{$fallbackArrayKey}_$i"] ); ++$i );
4382 $fallbackAnchor .= "_$i";
4383 $refers["{$fallbackArrayKey}_$i"] = true;
4384 } else {
4385 $refers[$fallbackArrayKey] = true;
4386 }
4387
4388 # Add the section to the section tree
4389 # Find the DOM node for this header
4390 $noOffset = ( $isTemplate || $sectionIndex === false );
4391 while ( $node && !$noOffset ) {
4392 if ( $node->getName() === 'h' ) {
4393 $bits = $node->splitHeading();
4394 if ( $bits['i'] == $sectionIndex ) {
4395 break;
4396 }
4397 }
4398 $cpOffset += mb_strlen(
4399 $this->mStripState->unstripBoth(
4400 $frame->expand( $node, PPFrame::RECOVER_ORIG )
4401 )
4402 );
4403 $node = $node->getNextSibling();
4404 }
4405 $sectionMetadata->line = $tocline;
4406 $sectionMetadata->codepointOffset = ( $noOffset ? null : $cpOffset );
4407 $sectionMetadata->anchor = $anchor;
4408 $sectionMetadata->linkAnchor = $linkAnchor;
4409
4410 if ( $maybeShowEditLink && $sectionIndex !== false ) {
4411 // Output edit section links as markers with styles that can be customized by skins
4412 if ( $isTemplate ) {
4413 # Put a T flag in the section identifier, to indicate to extractSections()
4414 # that sections inside <includeonly> should be counted.
4415 $editsectionPage = $titleText;
4416 $editsectionSection = "T-$sectionIndex";
4417 } else {
4418 $editsectionPage = $this->getTitle()->getPrefixedText();
4419 $editsectionSection = $sectionIndex;
4420 }
4421 // Construct a pseudo-HTML tag as a placeholder for the section edit link. It is replaced in
4422 // MediaWiki\OutputTransform\Stages\HandleSectionLinks with the real link.
4423 //
4424 // Any HTML markup in the input has already been escaped,
4425 // so we don't have to worry about a user trying to input one of these markers directly.
4426 //
4427 // We put the page and section in attributes to stop the language converter from
4428 // converting them, but put the headline hint in tag content
4429 // because it is supposed to be able to convert that.
4430 $editlink = '<mw:editsection page="' . htmlspecialchars( $editsectionPage, ENT_COMPAT );
4431 $editlink .= '" section="' . htmlspecialchars( $editsectionSection, ENT_COMPAT ) . '"';
4432 $editlink .= '>' . $headlineHint . '</mw:editsection>';
4433 } else {
4434 $editlink = '';
4435 }
4436 // Reconstruct the original <h#> tag with added attributes. It is replaced in
4437 // MediaWiki\OutputTransform\Stages\HandleSectionLinks to add anchors and stuff.
4438 //
4439 // data-mw-... attributes are forbidden in Sanitizer::isReservedDataAttribute(),
4440 // so we don't have to worry about a user trying to input one of these markers directly.
4441 //
4442 // We put the anchors in attributes to stop the language converter from converting them.
4443 $head[$headlineCount] = "<h$level" . Html::expandAttributes( [
4444 'data-mw-anchor' => $anchor,
4445 'data-mw-fallback-anchor' => $fallbackAnchor,
4446 ] ) . $matches['attrib'][$headlineCount] . $headline . $editlink . "</h$level>";
4447
4448 $headlineCount++;
4449 }
4450
4451 $this->setOutputType( $oldType );
4452
4453 # Never ever show TOC if no headers (or suppressed)
4454 $suppressToc = $this->mOptions->getSuppressTOC();
4455 if ( !$haveTocEntries ) {
4456 $enoughToc = false;
4457 }
4458 $addTOCPlaceholder = false;
4459
4460 if ( $isMain && !$suppressToc ) {
4461 // We generally output the section information via the API
4462 // even if there isn't "enough" of a ToC to merit showing
4463 // it -- but the "suppress TOC" parser option is set when
4464 // any sections that might be found aren't "really there"
4465 // (ie, JavaScript content that might have spurious === or
4466 // <h2>: T307691) so we will *not* set section information
4467 // in that case.
4468 $this->mOutput->setTOCData( $tocData );
4469
4470 // T294950: Record a suggestion that the TOC should be shown.
4471 // We shouldn't be looking at ::getTOCHTML() for this because
4472 // that was replaced (T293513); and $tocData will contain sections
4473 // even if there aren't $enoughToc to show (T332243).
4474 // Skins are free to ignore this suggestion and implement their
4475 // own criteria for showing/suppressing TOC (T318186).
4476 if ( $enoughToc ) {
4477 $this->mOutput->setOutputFlag( ParserOutputFlags::SHOW_TOC );
4478 if ( !$this->mForceTocPosition ) {
4479 $addTOCPlaceholder = true;
4480 }
4481 }
4482
4483 // If __NOTOC__ is used on the page (and not overridden by
4484 // __TOC__ or __FORCETOC__) set the NO_TOC flag to tell
4485 // the skin that although the section information is
4486 // valid, it should perhaps not be presented as a Table Of
4487 // Contents.
4488 if ( !$this->mShowToc ) {
4489 $this->mOutput->setOutputFlag( ParserOutputFlags::NO_TOC );
4490 }
4491 }
4492
4493 # split up and insert constructed headlines
4494 $blocks = preg_split( '/<H[1-6].*?>[\s\S]*?<\/H[1-6]>/i', $text );
4495 $i = 0;
4496
4497 // build an array of document sections
4498 $sections = [];
4499 foreach ( $blocks as $block ) {
4500 // $head is zero-based, sections aren't.
4501 if ( empty( $head[$i - 1] ) ) {
4502 $sections[$i] = $block;
4503 } else {
4504 $sections[$i] = $head[$i - 1] . $block;
4505 }
4506
4507 $i++;
4508 }
4509
4510 if ( $addTOCPlaceholder ) {
4511 // append the TOC at the beginning
4512 // Top anchor now in skin
4513 // @phan-suppress-next-line PhanTypePossiblyInvalidDimOffset At least one element when enoughToc is true
4514 $sections[0] .= self::TOC_PLACEHOLDER . "\n";
4515 }
4516
4517 $full .= implode( '', $sections );
4518
4519 return $full;
4520 }
4521
4531 private static function localizeTOC(
4532 ?TOCData $tocData, Language $lang, ?ILanguageConverter $converter
4533 ) {
4534 if ( $tocData === null ) {
4535 return; // Nothing to do
4536 }
4537 foreach ( $tocData->getSections() as $s ) {
4538 // Localize heading
4539 if ( $converter ) {
4540 // T331316: don't use 'convert' or 'convertTo' as these reset
4541 // the language converter state.
4542 $s->line = $converter->convertTo(
4543 $s->line, $converter->getPreferredVariant(), false
4544 );
4545 }
4546 // Localize numbering
4547 $dot = '.';
4548 $pieces = explode( $dot, $s->number );
4549 $numbering = '';
4550 foreach ( $pieces as $i => $p ) {
4551 if ( $i > 0 ) {
4552 $numbering .= $dot;
4553 }
4554 $numbering .= $lang->formatNum( $p );
4555 }
4556 $s->number = $numbering;
4557 }
4558 }
4559
4572 public function preSaveTransform(
4573 $text,
4574 PageReference $page,
4575 UserIdentity $user,
4576 ParserOptions $options,
4577 $clearState = true
4578 ) {
4579 if ( $clearState ) {
4580 $magicScopeVariable = $this->lock();
4581 }
4582 $this->startParse( $page, $options, self::OT_WIKI, $clearState );
4583 $this->setUser( $user );
4584
4585 // Strip U+0000 NULL (T159174)
4586 $text = str_replace( "\000", '', $text );
4587
4588 // We still normalize line endings (including trimming trailing whitespace) for
4589 // backwards-compatibility with other code that just calls PST, but this should already
4590 // be handled in TextContent subclasses
4591 $text = TextContent::normalizeLineEndings( $text );
4592
4593 if ( $options->getPreSaveTransform() ) {
4594 $text = $this->pstPass2( $text, $user );
4595 }
4596 $text = $this->mStripState->unstripBoth( $text );
4597
4598 // Trim trailing whitespace again, because the previous steps can introduce it.
4599 $text = rtrim( $text );
4600
4601 $this->hookRunner->onParserPreSaveTransformComplete( $this, $text );
4602
4603 $this->setUser( null ); # Reset
4604
4605 return $text;
4606 }
4607
4616 private function pstPass2( $text, UserIdentity $user ) {
4617 # Note: This is the timestamp saved as hardcoded wikitext to the database, we use
4618 # $this->contLang here in order to give everyone the same signature and use the default one
4619 # rather than the one selected in each user's preferences. (see also T14815)
4620 $ts = $this->mOptions->getTimestamp();
4621 $timestamp = MWTimestamp::getLocalInstance( $ts );
4622 $ts = $timestamp->format( 'YmdHis' );
4623 $tzMsg = $timestamp->getTimezoneMessage()->inContentLanguage()->text();
4624
4625 $d = $this->contLang->timeanddate( $ts, false, false ) . " ($tzMsg)";
4626
4627 # Variable replacement
4628 # Because mOutputType is OT_WIKI, this will only process {{subst:xxx}} type tags
4629 $text = $this->replaceVariables( $text );
4630
4631 # This works almost by chance, as the replaceVariables are done before the getUserSig(),
4632 # which may corrupt this parser instance via its wfMessage()->text() call-
4633
4634 # Signatures
4635 if ( strpos( $text, '~~~' ) !== false ) {
4636 $sigText = $this->getUserSig( $user );
4637 $text = strtr( $text, [
4638 '~~~~~' => $d,
4639 '~~~~' => "$sigText $d",
4640 '~~~' => $sigText
4641 ] );
4642 # The main two signature forms used above are time-sensitive
4643 $this->setOutputFlag( ParserOutputFlags::USER_SIGNATURE, 'User signature detected' );
4644 }
4645
4646 # Context links ("pipe tricks"): [[|name]] and [[name (context)|]]
4647 $tc = '[' . Title::legalChars() . ']';
4648 $nc = '[ _0-9A-Za-z\x80-\xff-]'; # Namespaces can use non-ascii!
4649
4650 // [[ns:page (context)|]]
4651 $p1 = "/\[\[(:?$nc+:|:|)($tc+?)( ?\\($tc+\\))\\|]]/";
4652 // [[ns:page(context)|]] (double-width brackets, added in r40257)
4653 $p4 = "/\[\[(:?$nc+:|:|)($tc+?)( ?($tc+))\\|]]/";
4654 // [[ns:page (context), context|]] (using single, double-width or Arabic comma)
4655 $p3 = "/\[\[(:?$nc+:|:|)($tc+?)( ?\\($tc+\\)|)((?:, |,|، )$tc+|)\\|]]/";
4656 // [[|page]] (reverse pipe trick: add context from page title)
4657 $p2 = "/\[\[\\|($tc+)]]/";
4658
4659 # try $p1 first, to turn "[[A, B (C)|]]" into "[[A, B (C)|A, B]]"
4660 $text = preg_replace( $p1, '[[\\1\\2\\3|\\2]]', $text );
4661 $text = preg_replace( $p4, '[[\\1\\2\\3|\\2]]', $text );
4662 $text = preg_replace( $p3, '[[\\1\\2\\3\\4|\\2]]', $text );
4663
4664 $t = $this->getTitle()->getText();
4665 $m = [];
4666 if ( preg_match( "/^($nc+:|)$tc+?( \\($tc+\\))$/", $t, $m ) ) {
4667 $text = preg_replace( $p2, "[[$m[1]\\1$m[2]|\\1]]", $text );
4668 } elseif ( preg_match( "/^($nc+:|)$tc+?(, $tc+|)$/", $t, $m ) && "$m[1]$m[2]" != '' ) {
4669 $text = preg_replace( $p2, "[[$m[1]\\1$m[2]|\\1]]", $text );
4670 } else {
4671 # if there's no context, don't bother duplicating the title
4672 $text = preg_replace( $p2, '[[\\1]]', $text );
4673 }
4674
4675 return $text;
4676 }
4677
4693 public function getUserSig( UserIdentity $user, $nickname = false, $fancySig = null ) {
4694 $username = $user->getName();
4695
4696 # If not given, retrieve from the user object.
4697 if ( $nickname === false ) {
4698 $nickname = $this->userOptionsLookup->getOption( $user, 'nickname' );
4699 }
4700
4701 if ( $fancySig === null ) {
4702 $fancySig = $this->userOptionsLookup->getBoolOption( $user, 'fancysig' );
4703 }
4704
4705 if ( $nickname === null || $nickname === '' ) {
4706 // Empty value results in the default signature (even when fancysig is enabled)
4707 $nickname = $username;
4708 } elseif ( mb_strlen( $nickname ) > $this->svcOptions->get( MainConfigNames::MaxSigChars ) ) {
4709 $nickname = $username;
4710 $this->logger->debug( __METHOD__ . ": $username has overlong signature." );
4711 } elseif ( $fancySig !== false ) {
4712 # Sig. might contain markup; validate this
4713 $isValid = $this->validateSig( $nickname ) !== false;
4714
4715 # New validator
4716 $sigValidation = $this->svcOptions->get( MainConfigNames::SignatureValidation );
4717 if ( $isValid && $sigValidation === 'disallow' ) {
4718 $parserOpts = new ParserOptions(
4719 $this->mOptions->getUserIdentity(),
4720 $this->contLang
4721 );
4722 $validator = $this->signatureValidatorFactory
4723 ->newSignatureValidator( $user, null, $parserOpts );
4724 $isValid = !$validator->validateSignature( $nickname );
4725 }
4726
4727 if ( $isValid ) {
4728 # Validated; clean up (if needed) and return it
4729 return $this->cleanSig( $nickname, true );
4730 } else {
4731 # Failed to validate; fall back to the default
4732 $nickname = $username;
4733 $this->logger->debug( __METHOD__ . ": $username has invalid signature." );
4734 }
4735 }
4736
4737 # Make sure nickname doesnt get a sig in a sig
4738 $nickname = self::cleanSigInSig( $nickname );
4739
4740 # If we're still here, make it a link to the user page
4741 $userText = wfEscapeWikiText( $username );
4742 $nickText = wfEscapeWikiText( $nickname );
4743 if ( $this->userNameUtils->isTemp( $username ) ) {
4744 $msgName = 'signature-temp';
4745 } elseif ( $user->isRegistered() ) {
4746 $msgName = 'signature';
4747 } else {
4748 $msgName = 'signature-anon';
4749 }
4750
4751 return wfMessage( $msgName, $userText, $nickText )->inContentLanguage()
4752 ->page( $this->getPage() )->text();
4753 }
4754
4762 public function validateSig( $text ) {
4763 return Xml::isWellFormedXmlFragment( $text ) ? $text : false;
4764 }
4765
4777 public function cleanSig( $text, $parsing = false ) {
4778 if ( !$parsing ) {
4779 $magicScopeVariable = $this->lock();
4780 $this->startParse(
4781 $this->mTitle,
4782 ParserOptions::newFromUser( RequestContext::getMain()->getUser() ),
4783 self::OT_PREPROCESS,
4784 true
4785 );
4786 }
4787
4788 # Option to disable this feature
4789 if ( !$this->mOptions->getCleanSignatures() ) {
4790 return $text;
4791 }
4792
4793 # @todo FIXME: Regex doesn't respect extension tags or nowiki
4794 # => Move this logic to braceSubstitution()
4795 $substWord = $this->magicWordFactory->get( 'subst' );
4796 $substRegex = '/\{\{(?!(?:' . $substWord->getBaseRegex() . '))/x' . $substWord->getRegexCase();
4797 $substText = '{{' . $substWord->getSynonym( 0 );
4798
4799 $text = preg_replace( $substRegex, $substText, $text );
4800 $text = self::cleanSigInSig( $text );
4801 $dom = $this->preprocessToDom( $text );
4802 $frame = $this->getPreprocessor()->newFrame();
4803 $text = $frame->expand( $dom );
4804
4805 if ( !$parsing ) {
4806 $text = $this->mStripState->unstripBoth( $text );
4807 }
4808
4809 return $text;
4810 }
4811
4819 public static function cleanSigInSig( $text ) {
4820 $text = preg_replace( '/~{3,5}/', '', $text );
4821 return $text;
4822 }
4823
4840 public static function replaceTableOfContentsMarker( $text, $toc ) {
4841 return preg_replace( self::TOC_PLACEHOLDER_REGEX,
4842 StringUtils::escapeRegexReplacement( $toc ), $text );
4843 }
4844
4856 public function startExternalParse( ?PageReference $page, ParserOptions $options,
4857 $outputType, $clearState = true, $revId = null
4858 ) {
4859 $this->startParse( $page, $options, $outputType, $clearState );
4860 if ( $revId !== null ) {
4861 $this->mRevisionId = $revId;
4862 }
4863 }
4864
4871 private function startParse( ?PageReference $page, ParserOptions $options,
4872 $outputType, $clearState = true
4873 ) {
4874 $this->setPage( $page );
4875 $this->mOptions = $options;
4876 $this->setOutputType( $outputType );
4877 if ( $clearState ) {
4878 $this->clearState();
4879 }
4880 }
4881
4891 public function transformMsg( $text, ParserOptions $options, ?PageReference $page = null ) {
4892 static $executing = false;
4893
4894 # Guard against infinite recursion
4895 if ( $executing ) {
4896 return $text;
4897 }
4898 $executing = true;
4899
4900 $text = $this->preprocess( $text, $page ?? $this->mTitle, $options );
4901
4902 $executing = false;
4903 return $text;
4904 }
4905
4925 public function setHook( $tag, callable $callback ) {
4926 $tag = strtolower( $tag );
4927 if ( preg_match( '/[<>\r\n]/', $tag, $m ) ) {
4928 throw new InvalidArgumentException( "Invalid character {$m[0]} in setHook('$tag', ...) call" );
4929 }
4930 $oldVal = $this->mTagHooks[$tag] ?? null;
4931 $this->mTagHooks[$tag] = $callback;
4932 if ( !in_array( $tag, $this->mStripList ) ) {
4933 $this->mStripList[] = $tag;
4934 }
4935
4936 return $oldVal;
4937 }
4938
4943 public function clearTagHooks() {
4944 $this->mTagHooks = [];
4945 $this->mStripList = [];
4946 }
4947
4991 public function setFunctionHook( $id, callable $callback, $flags = 0 ) {
4992 $oldVal = $this->mFunctionHooks[$id][0] ?? null;
4993 $this->mFunctionHooks[$id] = [ $callback, $flags ];
4994
4995 # Add to function cache
4996 $mw = $this->magicWordFactory->get( $id );
4997
4998 $synonyms = $mw->getSynonyms();
4999 $sensitive = intval( $mw->isCaseSensitive() );
5000
5001 foreach ( $synonyms as $syn ) {
5002 # Case
5003 if ( !$sensitive ) {
5004 $syn = $this->contLang->lc( $syn );
5005 }
5006 # Add leading hash
5007 if ( !( $flags & self::SFH_NO_HASH ) ) {
5008 $syn = '#' . $syn;
5009 }
5010 # Remove trailing colon
5011 if ( substr( $syn, -1, 1 ) === ':' ) {
5012 $syn = substr( $syn, 0, -1 );
5013 }
5014 $this->mFunctionSynonyms[$sensitive][$syn] = $id;
5015 }
5016 return $oldVal;
5017 }
5018
5025 public function getFunctionHooks() {
5026 return array_keys( $this->mFunctionHooks );
5027 }
5028
5036 public function replaceLinkHolders( &$text ) {
5037 $this->replaceLinkHoldersPrivate( $text );
5038 }
5039
5046 private function replaceLinkHoldersPrivate( &$text ) {
5047 $this->mLinkHolders->replace( $text );
5048 }
5049
5057 private function replaceLinkHoldersText( $text ) {
5058 return $this->mLinkHolders->replaceText( $text );
5059 }
5060
5075 public function renderImageGallery( $text, array $params ) {
5076 $mode = false;
5077 if ( isset( $params['mode'] ) ) {
5078 $mode = $params['mode'];
5079 }
5080
5081 try {
5082 $ig = ImageGalleryBase::factory( $mode );
5083 } catch ( ImageGalleryClassNotFoundException $e ) {
5084 // If invalid type set, fallback to default.
5085 $ig = ImageGalleryBase::factory( false );
5086 }
5087
5088 $ig->setContextTitle( $this->getTitle() );
5089 $ig->setShowBytes( false );
5090 $ig->setShowDimensions( false );
5091 $ig->setShowFilename( false );
5092 $ig->setParser( $this );
5093 $ig->setHideBadImages();
5094 $ig->setAttributes( Sanitizer::validateTagAttributes( $params, 'ul' ) );
5095
5096 if ( isset( $params['showfilename'] ) ) {
5097 $ig->setShowFilename( true );
5098 } else {
5099 $ig->setShowFilename( false );
5100 }
5101 if ( isset( $params['caption'] ) ) {
5102 // NOTE: We aren't passing a frame here or below. Frame info
5103 // is currently opaque to Parsoid, which acts on OT_PREPROCESS.
5104 // See T107332#4030581
5105 $caption = $this->recursiveTagParse( $params['caption'] );
5106 $ig->setCaptionHtml( $caption );
5107 }
5108 if ( isset( $params['perrow'] ) ) {
5109 $ig->setPerRow( $params['perrow'] );
5110 }
5111 if ( isset( $params['widths'] ) ) {
5112 $ig->setWidths( $params['widths'] );
5113 }
5114 if ( isset( $params['heights'] ) ) {
5115 $ig->setHeights( $params['heights'] );
5116 }
5117 $ig->setAdditionalOptions( $params );
5118
5119 $enableLegacyMediaDOM = $this->svcOptions->get( MainConfigNames::ParserEnableLegacyMediaDOM );
5120
5121 $lines = StringUtils::explode( "\n", $text );
5122 foreach ( $lines as $line ) {
5123 # match lines like these:
5124 # Image:someimage.jpg|This is some image
5125 $matches = [];
5126 preg_match( "/^([^|]+)(\\|(.*))?$/", $line, $matches );
5127 # Skip empty lines
5128 if ( count( $matches ) == 0 ) {
5129 continue;
5130 }
5131
5132 if ( strpos( $matches[0], '%' ) !== false ) {
5133 $matches[1] = rawurldecode( $matches[1] );
5134 }
5135 $title = Title::newFromText( $matches[1], NS_FILE );
5136 if ( $title === null ) {
5137 # Bogus title. Ignore these so we don't bomb out later.
5138 continue;
5139 }
5140
5141 # We need to get what handler the file uses, to figure out parameters.
5142 # Note, a hook can override the file name, and chose an entirely different
5143 # file (which potentially could be of a different type and have different handler).
5144 $options = [];
5145 $descQuery = false;
5146 $this->hookRunner->onBeforeParserFetchFileAndTitle(
5147 // @phan-suppress-next-line PhanTypeMismatchArgument Type mismatch on pass-by-ref args
5148 $this, $title, $options, $descQuery
5149 );
5150 # Don't register it now, as TraditionalImageGallery does that later.
5151 $file = $this->fetchFileNoRegister( $title, $options );
5152 $handler = $file ? $file->getHandler() : false;
5153
5154 $paramMap = [
5155 'img_alt' => 'gallery-internal-alt',
5156 'img_link' => 'gallery-internal-link',
5157 ];
5158 if ( $handler ) {
5159 $paramMap += $handler->getParamMap();
5160 // We don't want people to specify per-image widths.
5161 // Additionally the width parameter would need special casing anyhow.
5162 unset( $paramMap['img_width'] );
5163 }
5164
5165 $mwArray = $this->magicWordFactory->newArray( array_keys( $paramMap ) );
5166
5167 $label = '';
5168 $alt = null;
5169 $handlerOptions = [];
5170 $imageOptions = [];
5171 $hasAlt = false;
5172
5173 if ( isset( $matches[3] ) ) {
5174 // look for an |alt= definition while trying not to break existing
5175 // captions with multiple pipes (|) in it, until a more sensible grammar
5176 // is defined for images in galleries
5177
5178 // FIXME: Doing recursiveTagParse at this stage, and the trim before
5179 // splitting on '|' is a bit odd, and different from makeImage.
5180 $matches[3] = $this->recursiveTagParse( trim( $matches[3] ) );
5181 // Protect LanguageConverter markup
5182 $parameterMatches = StringUtils::delimiterExplode(
5183 '-{', '}-',
5184 '|',
5185 $matches[3],
5186 true /* nested */
5187 );
5188
5189 foreach ( $parameterMatches as $parameterMatch ) {
5190 [ $magicName, $match ] = $mwArray->matchVariableStartToEnd( $parameterMatch );
5191 if ( !$magicName ) {
5192 // Last pipe wins.
5193 $label = $parameterMatch;
5194 continue;
5195 }
5196
5197 $paramName = $paramMap[$magicName];
5198 switch ( $paramName ) {
5199 case 'gallery-internal-alt':
5200 $hasAlt = true;
5201 $alt = $this->stripAltText( $match, false );
5202 break;
5203 case 'gallery-internal-link':
5204 $linkValue = $this->stripAltText( $match, false );
5205 if ( preg_match( '/^-{R\|(.*)}-$/', $linkValue ) ) {
5206 // Result of LanguageConverter::markNoConversion
5207 // invoked on an external link.
5208 $linkValue = substr( $linkValue, 4, -2 );
5209 }
5210 [ $type, $target ] = $this->parseLinkParameter( $linkValue );
5211 if ( $type ) {
5212 if ( $type === 'no-link' ) {
5213 $target = true;
5214 }
5215 $imageOptions[$type] = $target;
5216 }
5217 break;
5218 default:
5219 // Must be a handler specific parameter.
5220 if ( $handler->validateParam( $paramName, $match ) ) {
5221 $handlerOptions[$paramName] = $match;
5222 } else {
5223 // Guess not, consider it as caption.
5224 $this->logger->debug(
5225 "$parameterMatch failed parameter validation" );
5226 $label = $parameterMatch;
5227 }
5228 }
5229 }
5230 }
5231
5232 // Match makeImage when !$hasVisibleCaption
5233 if ( !$hasAlt ) {
5234 if ( $label !== '' ) {
5235 $alt = $this->stripAltText( $label, false );
5236 } else {
5237 if ( $enableLegacyMediaDOM ) {
5238 $alt = $title->getText();
5239 }
5240 }
5241 }
5242 $imageOptions['title'] = $this->stripAltText( $label, false );
5243
5244 // Match makeImage which sets this unconditionally
5245 $handlerOptions['targetlang'] = $this->getTargetLanguage()->getCode();
5246
5247 $ig->add(
5248 $title, $label, $alt, '', $handlerOptions,
5249 ImageGalleryBase::LOADING_DEFAULT, $imageOptions
5250 );
5251 }
5252 $html = $ig->toHTML();
5253 $this->hookRunner->onAfterParserFetchFileAndTitle( $this, $ig, $html );
5254 return $html;
5255 }
5256
5261 private function getImageParams( $handler ) {
5262 if ( $handler ) {
5263 $handlerClass = get_class( $handler );
5264 } else {
5265 $handlerClass = '';
5266 }
5267 if ( !isset( $this->mImageParams[$handlerClass] ) ) {
5268 # Initialise static lists
5269 static $internalParamNames = [
5270 'horizAlign' => [ 'left', 'right', 'center', 'none' ],
5271 'vertAlign' => [ 'baseline', 'sub', 'super', 'top', 'text-top', 'middle',
5272 'bottom', 'text-bottom' ],
5273 'frame' => [ 'thumbnail', 'manualthumb', 'framed', 'frameless',
5274 'upright', 'border', 'link', 'alt', 'class' ],
5275 ];
5276 static $internalParamMap;
5277 if ( !$internalParamMap ) {
5278 $internalParamMap = [];
5279 foreach ( $internalParamNames as $type => $names ) {
5280 foreach ( $names as $name ) {
5281 // For grep: img_left, img_right, img_center, img_none,
5282 // img_baseline, img_sub, img_super, img_top, img_text_top, img_middle,
5283 // img_bottom, img_text_bottom,
5284 // img_thumbnail, img_manualthumb, img_framed, img_frameless, img_upright,
5285 // img_border, img_link, img_alt, img_class
5286 $magicName = str_replace( '-', '_', "img_$name" );
5287 $internalParamMap[$magicName] = [ $type, $name ];
5288 }
5289 }
5290 }
5291
5292 # Add handler params
5293 $paramMap = $internalParamMap;
5294 if ( $handler ) {
5295 $handlerParamMap = $handler->getParamMap();
5296 foreach ( $handlerParamMap as $magic => $paramName ) {
5297 $paramMap[$magic] = [ 'handler', $paramName ];
5298 }
5299 } else {
5300 // Parse the size for non-existent files. See T273013
5301 $paramMap[ 'img_width' ] = [ 'handler', 'width' ];
5302 }
5303 $this->mImageParams[$handlerClass] = $paramMap;
5304 $this->mImageParamsMagicArray[$handlerClass] =
5305 $this->magicWordFactory->newArray( array_keys( $paramMap ) );
5306 }
5307 return [ $this->mImageParams[$handlerClass], $this->mImageParamsMagicArray[$handlerClass] ];
5308 }
5309
5319 public function makeImage( LinkTarget $link, $options, $holders = false ) {
5320 # Check if the options text is of the form "options|alt text"
5321 # Options are:
5322 # * thumbnail make a thumbnail with enlarge-icon and caption, alignment depends on lang
5323 # * left no resizing, just left align. label is used for alt= only
5324 # * right same, but right aligned
5325 # * none same, but not aligned
5326 # * ___px scale to ___ pixels width, no aligning. e.g. use in taxobox
5327 # * center center the image
5328 # * framed Keep original image size, no magnify-button.
5329 # * frameless like 'thumb' but without a frame. Keeps user preferences for width
5330 # * upright reduce width for upright images, rounded to full __0 px
5331 # * border draw a 1px border around the image
5332 # * alt Text for HTML alt attribute (defaults to empty)
5333 # * class Set a class for img node
5334 # * link Set the target of the image link. Can be external, interwiki, or local
5335 # vertical-align values (no % or length right now):
5336 # * baseline
5337 # * sub
5338 # * super
5339 # * top
5340 # * text-top
5341 # * middle
5342 # * bottom
5343 # * text-bottom
5344
5345 # Protect LanguageConverter markup when splitting into parts
5346 $parts = StringUtils::delimiterExplode(
5347 '-{', '}-', '|', $options, true /* allow nesting */
5348 );
5349
5350 # Give extensions a chance to select the file revision for us
5351 $options = [];
5352 $descQuery = false;
5353 $title = Title::castFromLinkTarget( $link ); // hook signature compat
5354 $this->hookRunner->onBeforeParserFetchFileAndTitle(
5355 // @phan-suppress-next-line PhanTypeMismatchArgument Type mismatch on pass-by-ref args
5356 $this, $title, $options, $descQuery
5357 );
5358 # Fetch and register the file (file title may be different via hooks)
5359 [ $file, $link ] = $this->fetchFileAndTitle( $link, $options );
5360
5361 # Get parameter map
5362 $handler = $file ? $file->getHandler() : false;
5363
5364 [ $paramMap, $mwArray ] = $this->getImageParams( $handler );
5365
5366 if ( !$file ) {
5367 $this->addTrackingCategory( 'broken-file-category' );
5368 }
5369
5370 # Process the input parameters
5371 $caption = '';
5372 $params = [ 'frame' => [], 'handler' => [],
5373 'horizAlign' => [], 'vertAlign' => [] ];
5374 $seenformat = false;
5375 foreach ( $parts as $part ) {
5376 $part = trim( $part );
5377 [ $magicName, $value ] = $mwArray->matchVariableStartToEnd( $part );
5378 $validated = false;
5379 if ( isset( $paramMap[$magicName] ) ) {
5380 [ $type, $paramName ] = $paramMap[$magicName];
5381
5382 # Special case; width and height come in one variable together
5383 if ( $type === 'handler' && $paramName === 'width' ) {
5384 $parsedWidthParam = self::parseWidthParam( $value );
5385 // Parsoid applies data-(width|height) attributes to broken
5386 // media spans, for client use. See T273013
5387 $validateFunc = static function ( $name, $value ) use ( $handler ) {
5388 return $handler
5389 ? $handler->validateParam( $name, $value )
5390 : $value > 0;
5391 };
5392 if ( isset( $parsedWidthParam['width'] ) ) {
5393 $width = $parsedWidthParam['width'];
5394 if ( $validateFunc( 'width', $width ) ) {
5395 $params[$type]['width'] = $width;
5396 $validated = true;
5397 }
5398 }
5399 if ( isset( $parsedWidthParam['height'] ) ) {
5400 $height = $parsedWidthParam['height'];
5401 if ( $validateFunc( 'height', $height ) ) {
5402 $params[$type]['height'] = $height;
5403 $validated = true;
5404 }
5405 }
5406 # else no validation -- T15436
5407 } else {
5408 if ( $type === 'handler' ) {
5409 # Validate handler parameter
5410 $validated = $handler->validateParam( $paramName, $value );
5411 } else {
5412 # Validate internal parameters
5413 switch ( $paramName ) {
5414 case 'alt':
5415 case 'class':
5416 $validated = true;
5417 $value = $this->stripAltText( $value, $holders );
5418 break;
5419 case 'link':
5420 [ $paramName, $value ] =
5421 $this->parseLinkParameter(
5422 $this->stripAltText( $value, $holders )
5423 );
5424 if ( $paramName ) {
5425 $validated = true;
5426 if ( $paramName === 'no-link' ) {
5427 $value = true;
5428 }
5429 }
5430 break;
5431 case 'manualthumb':
5432 # @todo FIXME: Possibly check validity here for
5433 # manualthumb? downstream behavior seems odd with
5434 # missing manual thumbs.
5435 $value = $this->stripAltText( $value, $holders );
5436 // fall through
5437 case 'frameless':
5438 case 'framed':
5439 case 'thumbnail':
5440 // use first appearing option, discard others.
5441 $validated = !$seenformat;
5442 $seenformat = true;
5443 break;
5444 default:
5445 # Most other things appear to be empty or numeric...
5446 $validated = ( $value === false || is_numeric( trim( $value ) ) );
5447 }
5448 }
5449
5450 if ( $validated ) {
5451 $params[$type][$paramName] = $value;
5452 }
5453 }
5454 }
5455 if ( !$validated ) {
5456 $caption = $part;
5457 }
5458 }
5459
5460 # Process alignment parameters
5461 if ( $params['horizAlign'] !== [] ) {
5462 $params['frame']['align'] = array_key_first( $params['horizAlign'] );
5463 }
5464 if ( $params['vertAlign'] !== [] ) {
5465 $params['frame']['valign'] = array_key_first( $params['vertAlign'] );
5466 }
5467
5468 $params['frame']['caption'] = $caption;
5469
5470 $enableLegacyMediaDOM = $this->svcOptions->get( MainConfigNames::ParserEnableLegacyMediaDOM );
5471
5472 # Will the image be presented in a frame, with the caption below?
5473 // @phan-suppress-next-line PhanImpossibleCondition
5474 $hasVisibleCaption = isset( $params['frame']['framed'] )
5475 // @phan-suppress-next-line PhanImpossibleCondition
5476 || isset( $params['frame']['thumbnail'] )
5477 // @phan-suppress-next-line PhanImpossibleCondition
5478 || isset( $params['frame']['manualthumb'] );
5479
5480 # In the old days, [[Image:Foo|text...]] would set alt text. Later it
5481 # came to also set the caption, ordinary text after the image -- which
5482 # makes no sense, because that just repeats the text multiple times in
5483 # screen readers. It *also* came to set the title attribute.
5484 # Now that we have an alt attribute, we should not set the alt text to
5485 # equal the caption: that's worse than useless, it just repeats the
5486 # text. This is the framed/thumbnail case. If there's no caption, we
5487 # use the unnamed parameter for alt text as well, just for the time be-
5488 # ing, if the unnamed param is set and the alt param is not.
5489 # For the future, we need to figure out if we want to tweak this more,
5490 # e.g., introducing a title= parameter for the title; ignoring the un-
5491 # named parameter entirely for images without a caption; adding an ex-
5492 # plicit caption= parameter and preserving the old magic unnamed para-
5493 # meter for BC; ...
5494 if ( $hasVisibleCaption ) {
5495 if (
5496 // @phan-suppress-next-line PhanImpossibleCondition
5497 $caption === '' && !isset( $params['frame']['alt'] ) &&
5498 $enableLegacyMediaDOM
5499 ) {
5500 # No caption or alt text, add the filename as the alt text so
5501 # that screen readers at least get some description of the image
5502 $params['frame']['alt'] = $link->getText();
5503 }
5504 # Do not set $params['frame']['title'] because tooltips are unnecessary
5505 # for framed images, the caption is visible
5506 } else {
5507 // @phan-suppress-next-line PhanImpossibleCondition
5508 if ( !isset( $params['frame']['alt'] ) ) {
5509 # No alt text, use the "caption" for the alt text
5510 if ( $caption !== '' ) {
5511 $params['frame']['alt'] = $this->stripAltText( $caption, $holders );
5512 } elseif ( $enableLegacyMediaDOM ) {
5513 # No caption, fall back to using the filename for the
5514 # alt text
5515 $params['frame']['alt'] = $link->getText();
5516 }
5517 }
5518 # Use the "caption" for the tooltip text
5519 $params['frame']['title'] = $this->stripAltText( $caption, $holders );
5520 }
5521 $params['handler']['targetlang'] = $this->getTargetLanguage()->getCode();
5522
5523 // hook signature compat again, $link may have changed
5524 $title = Title::castFromLinkTarget( $link );
5525 $this->hookRunner->onParserMakeImageParams( $title, $file, $params, $this );
5526
5527 # Linker does the rest
5528 $time = $options['time'] ?? false;
5529 // @phan-suppress-next-line PhanTypePossiblyInvalidDimOffset
5530 $ret = Linker::makeImageLink( $this, $link, $file, $params['frame'], $params['handler'],
5531 $time, $descQuery, $this->mOptions->getThumbSize() );
5532
5533 # Give the handler a chance to modify the parser object
5534 if ( $handler ) {
5535 $handler->parserTransformHook( $this, $file );
5536 }
5537 if ( $file ) {
5538 $this->modifyImageHtml( $file, $params, $ret );
5539 }
5540
5541 return $ret;
5542 }
5543
5562 private function parseLinkParameter( $value ) {
5563 $chars = self::EXT_LINK_URL_CLASS;
5564 $addr = self::EXT_LINK_ADDR;
5565 $prots = $this->urlUtils->validProtocols();
5566 $type = null;
5567 $target = false;
5568 if ( $value === '' ) {
5569 $type = 'no-link';
5570 } elseif ( preg_match( "/^((?i)$prots)/", $value ) ) {
5571 if ( preg_match( "/^((?i)$prots)$addr$chars*$/u", $value ) ) {
5572 $this->mOutput->addExternalLink( $value );
5573 $type = 'link-url';
5574 $target = $value;
5575 }
5576 } else {
5577 // Percent-decode link arguments for consistency with wikilink
5578 // handling (T216003#7836261).
5579 //
5580 // There's slight concern here though. The |link= option supports
5581 // two formats, link=Test%22test vs link=[[Test%22test]], both of
5582 // which are about to be decoded.
5583 //
5584 // In the former case, the decoding here is straightforward and
5585 // desirable.
5586 //
5587 // In the latter case, there's a potential for double decoding,
5588 // because the wikilink syntax has a higher precedence and has
5589 // already been parsed as a link before we get here. $value
5590 // has had stripAltText() called on it, which in turn calls
5591 // replaceLinkHoldersText() on the link. So, the text we're
5592 // getting at this point has already been percent decoded.
5593 //
5594 // The problematic case is if %25 is in the title, since that
5595 // decodes to %, which could combine with trailing characters.
5596 // However, % is not a valid link title character, so it would
5597 // not parse as a link and the string we received here would
5598 // still contain the encoded %25.
5599 //
5600 // Hence, double decoded is not an issue. See the test,
5601 // "Should not double decode the link option"
5602 if ( strpos( $value, '%' ) !== false ) {
5603 $value = rawurldecode( $value );
5604 }
5605 $linkTitle = Title::newFromText( $value );
5606 if ( $linkTitle ) {
5607 $this->mOutput->addLink( $linkTitle );
5608 $type = 'link-title';
5609 $target = $linkTitle;
5610 }
5611 }
5612 return [ $type, $target ];
5613 }
5614
5622 public function modifyImageHtml( File $file, array $params, string &$html ) {
5623 $this->hookRunner->onParserModifyImageHTML( $this, $file, $params, $html );
5624 }
5625
5631 private function stripAltText( $caption, $holders ) {
5632 # Strip bad stuff out of the title (tooltip). We can't just use
5633 # replaceLinkHoldersText() here, because if this function is called
5634 # from handleInternalLinks2(), mLinkHolders won't be up-to-date.
5635 if ( $holders ) {
5636 $tooltip = $holders->replaceText( $caption );
5637 } else {
5638 $tooltip = $this->replaceLinkHoldersText( $caption );
5639 }
5640
5641 # make sure there are no placeholders in thumbnail attributes
5642 # that are later expanded to html- so expand them now and
5643 # remove the tags
5644 $tooltip = $this->mStripState->unstripBoth( $tooltip );
5645 # Compatibility hack! In HTML certain entity references not terminated
5646 # by a semicolon are decoded (but not if we're in an attribute; that's
5647 # how link URLs get away without properly escaping & in queries).
5648 # But wikitext has always required semicolon-termination of entities,
5649 # so encode & where needed to avoid decode of semicolon-less entities.
5650 # See T209236 and
5651 # https://www.w3.org/TR/html5/syntax.html#named-character-references
5652 # T210437 discusses moving this workaround to Sanitizer::stripAllTags.
5653 $tooltip = preg_replace( "/
5654 & # 1. entity prefix
5655 (?= # 2. followed by:
5656 (?: # a. one of the legacy semicolon-less named entities
5657 A(?:Elig|MP|acute|circ|grave|ring|tilde|uml)|
5658 C(?:OPY|cedil)|E(?:TH|acute|circ|grave|uml)|
5659 GT|I(?:acute|circ|grave|uml)|LT|Ntilde|
5660 O(?:acute|circ|grave|slash|tilde|uml)|QUOT|REG|THORN|
5661 U(?:acute|circ|grave|uml)|Yacute|
5662 a(?:acute|c(?:irc|ute)|elig|grave|mp|ring|tilde|uml)|brvbar|
5663 c(?:cedil|edil|urren)|cent(?!erdot;)|copy(?!sr;)|deg|
5664 divide(?!ontimes;)|e(?:acute|circ|grave|th|uml)|
5665 frac(?:1(?:2|4)|34)|
5666 gt(?!c(?:c|ir)|dot|lPar|quest|r(?:a(?:pprox|rr)|dot|eq(?:less|qless)|less|sim);)|
5667 i(?:acute|circ|excl|grave|quest|uml)|laquo|
5668 lt(?!c(?:c|ir)|dot|hree|imes|larr|quest|r(?:Par|i(?:e|f|));)|
5669 m(?:acr|i(?:cro|ddot))|n(?:bsp|tilde)|
5670 not(?!in(?:E|dot|v(?:a|b|c)|)|ni(?:v(?:a|b|c)|);)|
5671 o(?:acute|circ|grave|rd(?:f|m)|slash|tilde|uml)|
5672 p(?:lusmn|ound)|para(?!llel;)|quot|r(?:aquo|eg)|
5673 s(?:ect|hy|up(?:1|2|3)|zlig)|thorn|times(?!b(?:ar|)|d;)|
5674 u(?:acute|circ|grave|ml|uml)|y(?:acute|en|uml)
5675 )
5676 (?:[^;]|$)) # b. and not followed by a semicolon
5677 # S = study, for efficiency
5678 /Sx", '&amp;', $tooltip );
5679 $tooltip = Sanitizer::stripAllTags( $tooltip );
5680
5681 return $tooltip;
5682 }
5683
5693 public function attributeStripCallback( &$text, $frame = false ) {
5694 wfDeprecated( __METHOD__, '1.35' );
5695 $text = $this->replaceVariables( $text, $frame );
5696 $text = $this->mStripState->unstripBoth( $text );
5697 return $text;
5698 }
5699
5706 public function getTags(): array {
5707 return array_keys( $this->mTagHooks );
5708 }
5709
5714 public function getFunctionSynonyms() {
5715 return $this->mFunctionSynonyms;
5716 }
5717
5722 public function getUrlProtocols() {
5723 return $this->urlUtils->validProtocols();
5724 }
5725
5756 private function extractSections( $text, $sectionId, $mode, $newText, ?PageReference $page = null ) {
5757 $magicScopeVariable = $this->lock();
5758 $this->startParse(
5759 $page,
5760 ParserOptions::newFromUser( RequestContext::getMain()->getUser() ),
5761 self::OT_PLAIN,
5762 true
5763 );
5764 $outText = '';
5765 $frame = $this->getPreprocessor()->newFrame();
5766
5767 # Process section extraction flags
5768 $flags = 0;
5769 $sectionParts = explode( '-', $sectionId );
5770 // The section ID may either be a magic string such as 'new' (which should be treated as 0),
5771 // or a numbered section ID in the format of "T-<section index>".
5772 // Explicitly coerce the section index into a number accordingly. (T323373)
5773 $sectionIndex = (int)array_pop( $sectionParts );
5774 foreach ( $sectionParts as $part ) {
5775 if ( $part === 'T' ) {
5776 $flags |= Preprocessor::DOM_FOR_INCLUSION;
5777 }
5778 }
5779
5780 # Check for empty input
5781 if ( strval( $text ) === '' ) {
5782 # Only sections 0 and T-0 exist in an empty document
5783 if ( $sectionIndex === 0 ) {
5784 if ( $mode === 'get' ) {
5785 return '';
5786 }
5787
5788 return $newText;
5789 } else {
5790 if ( $mode === 'get' ) {
5791 return $newText;
5792 }
5793
5794 return $text;
5795 }
5796 }
5797
5798 # Preprocess the text
5799 $root = $this->preprocessToDom( $text, $flags );
5800
5801 # <h> nodes indicate section breaks
5802 # They can only occur at the top level, so we can find them by iterating the root's children
5803 $node = $root->getFirstChild();
5804
5805 # Find the target section
5806 if ( $sectionIndex === 0 ) {
5807 # Section zero doesn't nest, level=big
5808 $targetLevel = 1000;
5809 } else {
5810 while ( $node ) {
5811 if ( $node->getName() === 'h' ) {
5812 $bits = $node->splitHeading();
5813 if ( $bits['i'] == $sectionIndex ) {
5814 $targetLevel = $bits['level'];
5815 break;
5816 }
5817 }
5818 if ( $mode === 'replace' ) {
5819 $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG );
5820 }
5821 $node = $node->getNextSibling();
5822 }
5823 }
5824
5825 if ( !$node ) {
5826 # Not found
5827 if ( $mode === 'get' ) {
5828 return $newText;
5829 } else {
5830 return $text;
5831 }
5832 }
5833
5834 # Find the end of the section, including nested sections
5835 do {
5836 if ( $node->getName() === 'h' ) {
5837 $bits = $node->splitHeading();
5838 $curLevel = $bits['level'];
5839 // @phan-suppress-next-line PhanPossiblyUndeclaredVariable False positive
5840 if ( $bits['i'] != $sectionIndex && $curLevel <= $targetLevel ) {
5841 break;
5842 }
5843 }
5844 if ( $mode === 'get' ) {
5845 $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG );
5846 }
5847 $node = $node->getNextSibling();
5848 } while ( $node );
5849
5850 # Write out the remainder (in replace mode only)
5851 if ( $mode === 'replace' ) {
5852 # Output the replacement text
5853 # Add two newlines on -- trailing whitespace in $newText is conventionally
5854 # stripped by the editor, so we need both newlines to restore the paragraph gap
5855 # Only add trailing whitespace if there is newText
5856 if ( $newText != "" ) {
5857 $outText .= $newText . "\n\n";
5858 }
5859
5860 while ( $node ) {
5861 $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG );
5862 $node = $node->getNextSibling();
5863 }
5864 }
5865
5866 # Re-insert stripped tags
5867 $outText = rtrim( $this->mStripState->unstripBoth( $outText ) );
5868
5869 return $outText;
5870 }
5871
5887 public function getSection( $text, $sectionId, $defaultText = '' ) {
5888 return $this->extractSections( $text, $sectionId, 'get', $defaultText );
5889 }
5890
5904 public function replaceSection( $oldText, $sectionId, $newText ) {
5905 return $this->extractSections( $oldText, $sectionId, 'replace', $newText );
5906 }
5907
5937 public function getFlatSectionInfo( $text ) {
5938 $magicScopeVariable = $this->lock();
5939 $this->startParse(
5940 null,
5941 ParserOptions::newFromUser( RequestContext::getMain()->getUser() ),
5942 self::OT_PLAIN,
5943 true
5944 );
5945 $frame = $this->getPreprocessor()->newFrame();
5946 $root = $this->preprocessToDom( $text, 0 );
5947 $node = $root->getFirstChild();
5948 $offset = 0;
5949 $currentSection = [
5950 'index' => 0,
5951 'level' => 0,
5952 'offset' => 0,
5953 'heading' => '',
5954 'text' => ''
5955 ];
5956 $sections = [];
5957
5958 while ( $node ) {
5959 $nodeText = $frame->expand( $node, PPFrame::RECOVER_ORIG );
5960 if ( $node->getName() === 'h' ) {
5961 $bits = $node->splitHeading();
5962 $sections[] = $currentSection;
5963 $currentSection = [
5964 'index' => $bits['i'],
5965 'level' => $bits['level'],
5966 'offset' => $offset,
5967 'heading' => $nodeText,
5968 'text' => $nodeText
5969 ];
5970 } else {
5971 $currentSection['text'] .= $nodeText;
5972 }
5973 $offset += strlen( $nodeText );
5974 $node = $node->getNextSibling();
5975 }
5976 $sections[] = $currentSection;
5977 return $sections;
5978 }
5979
5991 public function getRevisionId() {
5992 return $this->mRevisionId;
5993 }
5994
6001 public function getRevisionRecordObject() {
6002 if ( $this->mRevisionRecordObject ) {
6003 return $this->mRevisionRecordObject;
6004 }
6005
6006 // NOTE: try to get the RevisionRecord object even if mRevisionId is null.
6007 // This is useful when parsing a revision that has not yet been saved.
6008 // However, if we get back a saved revision even though we are in
6009 // preview mode, we'll have to ignore it, see below.
6010 // NOTE: This callback may be used to inject an OLD revision that was
6011 // already loaded, so "current" is a bit of a misnomer. We can't just
6012 // skip it if mRevisionId is set.
6013 $rev = call_user_func(
6014 $this->mOptions->getCurrentRevisionRecordCallback(),
6015 $this->getTitle(),
6016 $this
6017 );
6018
6019 if ( !$rev ) {
6020 // The revision record callback returns `false` (not null) to
6021 // indicate that the revision is missing. (See for example
6022 // Parser::statelessFetchRevisionRecord(), the default callback.)
6023 // This API expects `null` instead. (T251952)
6024 return null;
6025 }
6026
6027 if ( $this->mRevisionId === null && $rev->getId() ) {
6028 // We are in preview mode (mRevisionId is null), and the current revision callback
6029 // returned an existing revision. Ignore it and return null, it's probably the page's
6030 // current revision, which is not what we want here. Note that we do want to call the
6031 // callback to allow the unsaved revision to be injected here, e.g. for
6032 // self-transclusion previews.
6033 return null;
6034 }
6035
6036 // If the parse is for a new revision, then the callback should have
6037 // already been set to force the object and should match mRevisionId.
6038 // If not, try to fetch by mRevisionId instead.
6039 if ( $this->mRevisionId && $rev->getId() != $this->mRevisionId ) {
6040 $rev = MediaWikiServices::getInstance()
6041 ->getRevisionLookup()
6042 ->getRevisionById( $this->mRevisionId );
6043 }
6044
6045 $this->mRevisionRecordObject = $rev;
6046
6047 return $this->mRevisionRecordObject;
6048 }
6049
6056 public function getRevisionTimestamp() {
6057 if ( $this->mRevisionTimestamp !== null ) {
6058 return $this->mRevisionTimestamp;
6059 }
6060
6061 # Use specified revision timestamp, falling back to the current timestamp
6062 $revObject = $this->getRevisionRecordObject();
6063 $timestamp = $revObject && $revObject->getTimestamp()
6064 ? $revObject->getTimestamp()
6065 : $this->mOptions->getTimestamp();
6066 $this->mOutput->setRevisionTimestampUsed( $timestamp ); // unadjusted time zone
6067
6068 # The cryptic '' timezone parameter tells to use the site-default
6069 # timezone offset instead of the user settings.
6070 # Since this value will be saved into the parser cache, served
6071 # to other users, and potentially even used inside links and such,
6072 # it needs to be consistent for all visitors.
6073 $this->mRevisionTimestamp = $this->contLang->userAdjust( $timestamp, '' );
6074
6075 return $this->mRevisionTimestamp;
6076 }
6077
6084 public function getRevisionUser(): ?string {
6085 if ( $this->mRevisionUser === null ) {
6086 $revObject = $this->getRevisionRecordObject();
6087
6088 # if this template is subst: the revision id will be blank,
6089 # so just use the current user's name
6090 if ( $revObject && $revObject->getUser() ) {
6091 $this->mRevisionUser = $revObject->getUser()->getName();
6092 } elseif ( $this->ot['wiki'] || $this->mOptions->getIsPreview() ) {
6093 $this->mRevisionUser = $this->getUserIdentity()->getName();
6094 } else {
6095 # Note that we fall through here with
6096 # $this->mRevisionUser still null
6097 }
6098 }
6099 return $this->mRevisionUser;
6100 }
6101
6108 public function getRevisionSize() {
6109 if ( $this->mRevisionSize === null ) {
6110 $revObject = $this->getRevisionRecordObject();
6111
6112 # if this variable is subst: the revision id will be blank,
6113 # so just use the parser input size, because the own substitution
6114 # will change the size.
6115 if ( $revObject ) {
6116 $this->mRevisionSize = $revObject->getSize();
6117 } else {
6118 $this->mRevisionSize = $this->mInputSize;
6119 }
6120 }
6121 return $this->mRevisionSize;
6122 }
6123
6137 public function getDefaultSort() {
6138 wfDeprecated( __METHOD__, '1.38' );
6139 return $this->mOutput->getPageProperty( 'defaultsort' ) ?? '';
6140 }
6141
6142 private static function getSectionNameFromStrippedText( $text ) {
6143 $text = Sanitizer::normalizeSectionNameWhitespace( $text );
6144 $text = Sanitizer::decodeCharReferences( $text );
6145 $text = self::normalizeSectionName( $text );
6146 return $text;
6147 }
6148
6149 private static function makeAnchor( $sectionName ) {
6150 return '#' . Sanitizer::escapeIdForLink( $sectionName );
6151 }
6152
6153 private function makeLegacyAnchor( $sectionName ) {
6154 $fragmentMode = $this->svcOptions->get( MainConfigNames::FragmentMode );
6155 if ( isset( $fragmentMode[1] ) && $fragmentMode[1] === 'legacy' ) {
6156 // ForAttribute() and ForLink() are the same for legacy encoding
6157 $id = Sanitizer::escapeIdForAttribute( $sectionName, Sanitizer::ID_FALLBACK );
6158 } else {
6159 $id = Sanitizer::escapeIdForLink( $sectionName );
6160 }
6161
6162 return "#$id";
6163 }
6164
6174 public function guessSectionNameFromWikiText( $text ) {
6175 # Strip out wikitext links(they break the anchor)
6176 $text = $this->stripSectionName( $text );
6177 $sectionName = self::getSectionNameFromStrippedText( $text );
6178 return self::makeAnchor( $sectionName );
6179 }
6180
6191 public function guessLegacySectionNameFromWikiText( $text ) {
6192 # Strip out wikitext links(they break the anchor)
6193 $text = $this->stripSectionName( $text );
6194 $sectionName = self::getSectionNameFromStrippedText( $text );
6195 return $this->makeLegacyAnchor( $sectionName );
6196 }
6197
6204 public static function guessSectionNameFromStrippedText( $text ) {
6205 $sectionName = self::getSectionNameFromStrippedText( $text );
6206 return self::makeAnchor( $sectionName );
6207 }
6208
6215 private static function normalizeSectionName( $text ) {
6216 # T90902: ensure the same normalization is applied for IDs as to links
6218 $titleParser = MediaWikiServices::getInstance()->getTitleParser();
6219 '@phan-var MediaWikiTitleCodec $titleParser';
6220 try {
6221
6222 $parts = $titleParser->splitTitleString( "#$text" );
6223 } catch ( MalformedTitleException $ex ) {
6224 return $text;
6225 }
6226 return $parts['fragment'];
6227 }
6228
6244 public function stripSectionName( $text ) {
6245 # Strip internal link markup
6246 $text = preg_replace( '/\[\[:?([^[|]+)\|([^[]+)\]\]/', '$2', $text );
6247 $text = preg_replace( '/\[\[:?([^[]+)\|?\]\]/', '$1', $text );
6248
6249 # Strip external link markup
6250 # @todo FIXME: Not tolerant to blank link text
6251 # I.E. [https://www.mediawiki.org] will render as [1] or something depending
6252 # on how many empty links there are on the page - need to figure that out.
6253 $text = preg_replace(
6254 '/\[(?i:' . $this->urlUtils->validProtocols() . ')([^ ]+?) ([^[]+)\]/', '$2', $text );
6255
6256 # Parse wikitext quotes (italics & bold)
6257 $text = $this->doQuotes( $text );
6258
6259 # Strip HTML tags
6260 $text = StringUtils::delimiterReplace( '<', '>', '', $text );
6261 return $text;
6262 }
6263
6282 public function markerSkipCallback( $s, callable $callback ) {
6283 $i = 0;
6284 $out = '';
6285 while ( $i < strlen( $s ) ) {
6286 $markerStart = strpos( $s, self::MARKER_PREFIX, $i );
6287 if ( $markerStart === false ) {
6288 $out .= call_user_func( $callback, substr( $s, $i ) );
6289 break;
6290 } else {
6291 $out .= call_user_func( $callback, substr( $s, $i, $markerStart - $i ) );
6292 $markerEnd = strpos( $s, self::MARKER_SUFFIX, $markerStart );
6293 if ( $markerEnd === false ) {
6294 $out .= substr( $s, $markerStart );
6295 break;
6296 } else {
6297 $markerEnd += strlen( self::MARKER_SUFFIX );
6298 $out .= substr( $s, $markerStart, $markerEnd - $markerStart );
6299 $i = $markerEnd;
6300 }
6301 }
6302 }
6303 return $out;
6304 }
6305
6313 public function killMarkers( $text ) {
6314 return $this->mStripState->killMarkers( $text );
6315 }
6316
6327 public static function parseWidthParam( $value, $parseHeight = true ) {
6328 $parsedWidthParam = [];
6329 if ( $value === '' ) {
6330 return $parsedWidthParam;
6331 }
6332 $m = [];
6333 # (T15500) In both cases (width/height and width only),
6334 # permit trailing "px" for backward compatibility.
6335 if ( $parseHeight && preg_match( '/^([0-9]*)x([0-9]*)\s*(?:px)?\s*$/', $value, $m ) ) {
6336 $width = intval( $m[1] );
6337 $height = intval( $m[2] );
6338 $parsedWidthParam['width'] = $width;
6339 $parsedWidthParam['height'] = $height;
6340 } elseif ( preg_match( '/^[0-9]*\s*(?:px)?\s*$/', $value ) ) {
6341 $width = intval( $value );
6342 $parsedWidthParam['width'] = $width;
6343 }
6344 return $parsedWidthParam;
6345 }
6346
6355 protected function lock() {
6356 if ( $this->mInParse ) {
6357 throw new LogicException( "Parser state cleared while parsing. "
6358 . "Did you call Parser::parse recursively? Lock is held by: " . $this->mInParse );
6359 }
6360
6361 // Save the backtrace when locking, so that if some code tries locking again,
6362 // we can print the lock owner's backtrace for easier debugging
6363 $e = new RuntimeException;
6364 $this->mInParse = $e->getTraceAsString();
6365
6366 $recursiveCheck = new ScopedCallback( function () {
6367 $this->mInParse = false;
6368 } );
6369
6370 return $recursiveCheck;
6371 }
6372
6380 public function isLocked() {
6381 return (bool)$this->mInParse;
6382 }
6383
6394 public static function stripOuterParagraph( $html ) {
6395 $m = [];
6396 if ( preg_match( '/^<p>(.*)\n?<\/p>\n?$/sU', $html, $m ) && strpos( $m[1], '</p>' ) === false ) {
6397 $html = $m[1];
6398 }
6399
6400 return $html;
6401 }
6402
6413 public static function formatPageTitle( $nsText, $nsSeparator, $mainText ): string {
6414 $html = '';
6415 if ( $nsText !== '' ) {
6416 $html .= '<span class="mw-page-title-namespace">' . HtmlArmor::getHtml( $nsText ) . '</span>';
6417 $html .= '<span class="mw-page-title-separator">' . HtmlArmor::getHtml( $nsSeparator ) . '</span>';
6418 }
6419 $html .= '<span class="mw-page-title-main">' . HtmlArmor::getHtml( $mainText ) . '</span>';
6420 return $html;
6421 }
6422
6429 public static function extractBody( string $text ): string {
6430 $text = preg_replace( '!^.*?<body[^>]*>!s', '', $text, 1 );
6431 $text = preg_replace( '!</body>\s*</html>\s*$!', '', $text, 1 );
6432 return $text;
6433 }
6434
6446 public function getFreshParser() {
6447 wfDeprecated( __METHOD__, '1.39' );
6448 if ( $this->mInParse ) {
6449 return $this->factory->create();
6450 } else {
6451 return $this;
6452 }
6453 }
6454
6462 public function enableOOUI() {
6463 wfDeprecated( __METHOD__, '1.35' );
6464 OutputPage::setupOOUI();
6465 $this->mOutput->setEnableOOUI( true );
6466 }
6467
6474 private function setOutputFlag( string $flag, string $reason ): void {
6475 $this->mOutput->setOutputFlag( $flag );
6476 $name = $this->getTitle()->getPrefixedText();
6477 $this->logger->debug( __METHOD__ . ": set $flag flag on '$name'; $reason" );
6478 }
6479}
6480
6485class_alias( Parser::class, 'Parser' );
getUser()
const OT_WIKI
Definition Defines.php:158
const NS_FILE
Definition Defines.php:70
const NS_MEDIAWIKI
Definition Defines.php:72
const NS_TEMPLATE
Definition Defines.php:74
const NS_SPECIAL
Definition Defines.php:53
const OT_PLAIN
Definition Defines.php:160
const OT_PREPROCESS
Definition Defines.php:159
const OT_HTML
Definition Defines.php:157
const NS_MEDIA
Definition Defines.php:52
const NS_CATEGORY
Definition Defines.php:78
wfEscapeWikiText( $input)
Escapes the given text so that it may be output using addWikiText() without any linking,...
wfDeprecatedMsg( $msg, $version=false, $component=false, $callerOffset=2)
Log a deprecation warning with arbitrary message text.
wfHostname()
Get host name of the current machine, for use in error reporting.
wfSetVar(&$dest, $source, $force=false)
Sets dest to source and returns the original value of dest If source is NULL, it just returns the val...
wfMatchesDomainList( $url, $domains)
Check whether a given URL has a domain that occurs in a given set of domains.
wfMessage( $key,... $params)
This is the function for getting translated interface messages.
wfDeprecated( $function, $version=false, $component=false, $callerOffset=2)
Logs a warning that a deprecated feature was used.
Title null $mTitle
if(!defined('MW_SETUP_CALLBACK'))
Definition WebStart.php:81
Expansions of core magic variables, used by the parser.
Various core parser functions, registered in every Parser.
Various tag hooks, registered in every Parser.
static register(Parser $parser, ServiceOptions $options)
const REGISTER_OPTIONS
Implements some public methods and some protected utility functions which are required by multiple ch...
Definition File.php:73
Marks HTML that shouldn't be escaped.
Definition HtmlArmor.php:30
Class for exceptions thrown by ImageGalleryBase::factory().
Methods for dealing with language codes.
Base class for language-specific code.
Definition Language.php:63
formatNum( $number)
Normally we output all numbers in plain en_US style, that is 293,291.235 for two hundred ninety-three...
Store key-value entries in a size-limited in-memory LRU cache.
Base media handler class.
Helper class for mapping value objects representing basic entities to cache keys.
This class performs some operations related to tracking categories, such as adding a tracking categor...
A class for passing options to services.
assertRequiredOptions(array $expectedKeys)
Assert that the list of options provided in this instance exactly match $expectedKeys,...
Group all the pieces relevant to the context of a request into one instance.
This class provides an implementation of the core hook interfaces, forwarding hook calls to HookConta...
This class is a collection of static functions that serve two purposes:
Definition Html.php:56
Factory creating MWHttpRequest objects.
Variant of the Message class.
An interface for creating language converters.
isConversionDisabled()
Whether to disable language variant conversion.
A service that provides utilities to do with language names and codes.
Factory to create LinkRender objects.
Class that generates HTML for internal links.
Some internal bits split of from Skin.php.
Definition Linker.php:65
A class containing constants representing the names of configuration variables.
const EnableParserLimitReporting
Name constant for the EnableParserLimitReporting setting, for use with Config::get()
const MaxSigChars
Name constant for the MaxSigChars setting, for use with Config::get()
const ServerName
Name constant for the ServerName setting, for use with Config::get()
const AllowSlowParserFunctions
Name constant for the AllowSlowParserFunctions setting, for use with Config::get()
const AllowDisplayTitle
Name constant for the AllowDisplayTitle setting, for use with Config::get()
const StylePath
Name constant for the StylePath setting, for use with Config::get()
const MaxTocLevel
Name constant for the MaxTocLevel setting, for use with Config::get()
const Localtimezone
Name constant for the Localtimezone setting, for use with Config::get()
const Server
Name constant for the Server setting, for use with Config::get()
const FragmentMode
Name constant for the FragmentMode setting, for use with Config::get()
const EnableScaryTranscluding
Name constant for the EnableScaryTranscluding setting, for use with Config::get()
const TranscludeCacheExpiry
Name constant for the TranscludeCacheExpiry setting, for use with Config::get()
const Sitename
Name constant for the Sitename setting, for use with Config::get()
const ArticlePath
Name constant for the ArticlePath setting, for use with Config::get()
const ScriptPath
Name constant for the ScriptPath setting, for use with Config::get()
const ParserEnableLegacyMediaDOM
Name constant for the ParserEnableLegacyMediaDOM setting, for use with Config::get()
const SignatureValidation
Name constant for the SignatureValidation setting, for use with Config::get()
const MiserMode
Name constant for the MiserMode setting, for use with Config::get()
const RawHtml
Name constant for the RawHtml setting, for use with Config::get()
const PreprocessorCacheThreshold
Name constant for the PreprocessorCacheThreshold setting, for use with Config::get()
const ExtraInterlanguageLinkPrefixes
Name constant for the ExtraInterlanguageLinkPrefixes setting, for use with Config::get()
const ShowHostnames
Name constant for the ShowHostnames setting, for use with Config::get()
Service locator for MediaWiki core services.
The Message class deals with fetching and processing of interface message into a variety of formats.
Definition Message.php:157
This is one of the Core classes and should be read at least once by any new developers.
Class for handling an array of magic words.
Store information about magic words, and create/cache MagicWord objects.
Rendered output of a wiki page, as parsed from wikitext.
PHP Parser - Processes wiki markup (which uses a more user-friendly syntax, such as "[[link]]" for ma...
Definition Parser.php:155
guessLegacySectionNameFromWikiText( $text)
Same as guessSectionNameFromWikiText(), but produces legacy anchors instead, if possible.
Definition Parser.php:6191
$mExpensiveFunctionCount
Number of expensive parser function calls.
Definition Parser.php:295
__construct(ServiceOptions $svcOptions, MagicWordFactory $magicWordFactory, Language $contLang, ParserFactory $factory, UrlUtils $urlUtils, SpecialPageFactory $spFactory, LinkRendererFactory $linkRendererFactory, NamespaceInfo $nsInfo, LoggerInterface $logger, BadFileLookup $badFileLookup, LanguageConverterFactory $languageConverterFactory, HookContainer $hookContainer, TidyDriverBase $tidy, WANObjectCache $wanCache, UserOptionsLookup $userOptionsLookup, UserFactory $userFactory, TitleFormatter $titleFormatter, HttpRequestFactory $httpRequestFactory, TrackingCategories $trackingCategories, SignatureValidatorFactory $signatureValidatorFactory, UserNameUtils $userNameUtils)
Constructing parsers directly is not allowed! Use a ParserFactory.
Definition Parser.php:442
getTargetLanguageConverter()
Shorthand for getting a Language Converter for Target language.
Definition Parser.php:1631
setOutputType( $ot)
Mutator for the output type.
Definition Parser.php:1063
getBadFileLookup()
Get the BadFileLookup instance that this Parser is using.
Definition Parser.php:1232
stripSectionName( $text)
Strips a text string of wikitext for use in a section anchor.
Definition Parser.php:6244
getDefaultSort()
Accessor for the 'defaultsort' page property.
Definition Parser.php:6137
limitationWarn( $limitationType, $current='', $max='')
Warn the user when a parser limitation is reached Will warn at most once the user per limitation type...
Definition Parser.php:2968
makeImage(LinkTarget $link, $options, $holders=false)
Parse image options text and use it to make an image.
Definition Parser.php:5319
const OT_PLAIN
Output type: like Parser::extractSections() - portions of the original are returned unchanged.
Definition Parser.php:201
static guessSectionNameFromStrippedText( $text)
Like guessSectionNameFromWikiText(), but takes already-stripped text as input.
Definition Parser.php:6204
static statelessFetchTemplate( $page, $parser=false)
Static function to get a template Can be overridden via ParserOptions::setTemplateCallback().
Definition Parser.php:3619
markerSkipCallback( $s, callable $callback)
Call a callback function on all regions of the given text that are not inside strip markers,...
Definition Parser.php:6282
getPreloadText( $text, PageReference $page, ParserOptions $options, $params=[])
Process the wikitext for the "?preload=" feature.
Definition Parser.php:961
getLinkRenderer()
Get a LinkRenderer instance to make links with.
Definition Parser.php:1197
parse( $text, PageReference $page, ParserOptions $options, $linestart=true, $clearState=true, $revid=null)
Convert wikitext to HTML Do not call this function recursively.
Definition Parser.php:659
tagNeedsNowikiStrippedInTagPF(string $lowerTagName)
Definition Parser.php:3925
doBlockLevels( $text, $linestart)
Make lists from lines starting with ':', '*', '#', etc.
Definition Parser.php:2812
getMagicWordFactory()
Get the MagicWordFactory that this Parser is using.
Definition Parser.php:1212
lock()
Lock the current instance of the parser.
Definition Parser.php:6355
setFunctionHook( $id, callable $callback, $flags=0)
Create a function, e.g.
Definition Parser.php:4991
const EXT_LINK_URL_CLASS
Everything except bracket, space, or control characters.
Definition Parser.php:169
msg(string $msg,... $args)
Helper function to correctly set the target language and title of a message based on the parser conte...
Definition Parser.php:4143
preprocess( $text, ?PageReference $page, ParserOptions $options, $revid=null, $frame=false)
Expand templates and variables in the text, producing valid, static wikitext.
Definition Parser.php:914
firstCallInit()
Used to do various kinds of initialisation on the first call of the parser.
Definition Parser.php:578
guessSectionNameFromWikiText( $text)
Try to guess the section anchor name based on a wikitext fragment presumably extracted from a heading...
Definition Parser.php:6174
getUserSig(UserIdentity $user, $nickname=false, $fancySig=null)
Fetch the user's signature text, if any, and normalize to validated, ready-to-insert wikitext.
Definition Parser.php:4693
setTitle(Title $t=null)
Set the context title.
Definition Parser.php:994
interwikiTransclude(LinkTarget $link, $action)
Transclude an interwiki link.
Definition Parser.php:3813
makeLimitReport()
Set the limit report data in the current ParserOutput.
Definition Parser.php:758
validateSig( $text)
Check that the user's signature contains no bad XML.
Definition Parser.php:4762
isCurrentRevisionOfTitleCached(LinkTarget $link)
Definition Parser.php:3538
getRevisionId()
Get the ID of the revision we are parsing.
Definition Parser.php:5991
renderImageGallery( $text, array $params)
Renders an image gallery from a text with one line per image.
Definition Parser.php:5075
argSubstitution(array $piece, PPFrame $frame)
Triple brace replacement – used for template arguments.
Definition Parser.php:3884
replaceSection( $oldText, $sectionId, $newText)
This function returns $oldtext after the content of the section specified by $section has been replac...
Definition Parser.php:5904
transformMsg( $text, ParserOptions $options, ?PageReference $page=null)
Wrapper for preprocess()
Definition Parser.php:4891
insertStripItem( $text)
Add an item to the strip state Returns the unique tag which must be inserted into the stripped text T...
Definition Parser.php:1339
internalParse( $text, $isMain=true, $frame=false)
Helper function for parse() that transforms wiki markup into half-parsed HTML.
Definition Parser.php:1562
static normalizeLinkUrl( $url)
Replace unusual escape codes in a URL with their equivalent characters.
Definition Parser.php:2292
replaceLinkHolders(&$text)
Replace "<!--LINK-->" link placeholders with actual links, in the buffer Placeholders created in Link...
Definition Parser.php:5036
static extractTagsAndParams(array $elements, $text, &$matches)
Replaces all occurrences of HTML-style comments and the given tags in the text with a random marker a...
Definition Parser.php:1255
static statelessFetchRevisionRecord(LinkTarget $link, $parser=null)
Wrapper around RevisionLookup::getKnownCurrentRevision.
Definition Parser.php:3554
getHookRunner()
Get a HookRunner for calling core hooks.
Definition Parser.php:1667
getContentLanguage()
Get the content language that this Parser is using.
Definition Parser.php:1222
$ot
Shortcut alias, see Parser::setOutputType()
Definition Parser.php:322
getExternalLinkAttribs( $url)
Get an associative array of additional HTML attributes appropriate for a particular external link.
Definition Parser.php:2259
setPage(?PageReference $t=null)
Set the page used as context for parsing, e.g.
Definition Parser.php:1013
setOptions(ParserOptions $options)
Mutator for the ParserOptions object.
Definition Parser.php:1107
preSaveTransform( $text, PageReference $page, UserIdentity $user, ParserOptions $options, $clearState=true)
Transform wiki markup when saving a page by doing "\\r\\n" -> "\\n" conversion, substituting signatur...
Definition Parser.php:4572
killMarkers( $text)
Remove any strip markers found in the given text.
Definition Parser.php:6313
const OT_PREPROCESS
Output type: like Parser::preprocess()
Definition Parser.php:196
cleanSig( $text, $parsing=false)
Clean up signature text.
Definition Parser.php:4777
isLocked()
Will entry points such as parse() throw an exception due to the parser already being active?
Definition Parser.php:6380
getTemplateDom(LinkTarget $title)
Get the semi-parsed DOM representation of a template with a given title, and its redirect destination...
Definition Parser.php:3463
getRevisionUser()
Get the name of the user that edited the last revision.
Definition Parser.php:6084
getFlatSectionInfo( $text)
Get an array of preprocessor section information.
Definition Parser.php:5937
getTargetLanguage()
Get the target language for the content being parsed.
Definition Parser.php:1158
clearState()
Clear Parser state.
Definition Parser.php:590
getFunctionHooks()
Get all registered function hook identifiers.
Definition Parser.php:5025
getFreshParser()
Return this parser if it is not doing anything, otherwise get a fresh parser.
Definition Parser.php:6446
enableOOUI()
Set's up the PHP implementation of OOUI for use in this request and instructs OutputPage to enable OO...
Definition Parser.php:6462
braceSubstitution(array $piece, PPFrame $frame)
Return the text of a template, after recursively replacing any variables or templates within the temp...
Definition Parser.php:2993
getUserIdentity()
Get a user either from the user set on Parser if it's set, or from the ParserOptions object otherwise...
Definition Parser.php:1177
static formatPageTitle( $nsText, $nsSeparator, $mainText)
Add HTML tags marking the parts of a page title, to be displayed in the first heading of the page.
Definition Parser.php:6413
ParserOptions null $mOptions
Definition Parser.php:308
setUser(?UserIdentity $user)
Set the current user.
Definition Parser.php:983
Options( $x=null)
Accessor/mutator for the ParserOptions object.
Definition Parser.php:1118
getHookContainer()
Get a HookContainer capable of returning metadata about hooks or running extension hooks.
Definition Parser.php:1655
getOutputType()
Accessor for the output type.
Definition Parser.php:1054
recursivePreprocess( $text, $frame=false)
Recursive parser entry point that can be called from an extension tag hook.
Definition Parser.php:941
getRevisionSize()
Get the size of the revision.
Definition Parser.php:6108
getPreprocessor()
Get a preprocessor object.
Definition Parser.php:1187
getStripList()
Get a list of strippable XML-like elements.
Definition Parser.php:1318
extensionSubstitution(array $params, PPFrame $frame, bool $processNowiki=false)
Return the text to be used for a given extension tag.
Definition Parser.php:3949
setHook( $tag, callable $callback)
Create an HTML-style tag, e.g.
Definition Parser.php:4925
preprocessToDom( $text, $flags=0)
Get the document object model for the given wikitext.
Definition Parser.php:2892
getSection( $text, $sectionId, $defaultText='')
This function returns the text of a section, specified by a number ($section).
Definition Parser.php:5887
static parseWidthParam( $value, $parseHeight=true)
Parsed a width param of imagelink like 300px or 200x300px.
Definition Parser.php:6327
const OT_WIKI
Output type: like Parser::preSaveTransform()
Definition Parser.php:194
fetchTemplateAndTitle(LinkTarget $link)
Fetch the unparsed text of a template and register a reference to it.
Definition Parser.php:3577
static getExternalLinkRel( $url=false, LinkTarget $title=null)
Get the rel attribute for a particular external link.
Definition Parser.php:2234
static stripOuterParagraph( $html)
Strip outer.
Definition Parser.php:6394
getRevisionRecordObject()
Get the revision record object for $this->mRevisionId.
Definition Parser.php:6001
parseExtensionTagAsTopLevelDoc( $text)
Needed by Parsoid/PHP to ensure all the hooks for extensions are run in the right order.
Definition Parser.php:895
OutputType( $x=null)
Accessor/mutator for the output type.
Definition Parser.php:1081
clearTagHooks()
Remove all tag hooks.
Definition Parser.php:4943
modifyImageHtml(File $file, array $params, string &$html)
Give hooks a chance to modify image thumbnail HTML.
Definition Parser.php:5622
static extractBody(string $text)
Strip everything but the <body> from the provided string.
Definition Parser.php:6429
getRevisionTimestamp()
Get the timestamp associated with the current revision, adjusted for the default server-local timesta...
Definition Parser.php:6056
__clone()
Allow extensions to clean up when the parser is cloned.
Definition Parser.php:548
Title $mTitle
Title context, used for self-link rendering and similar things.
Definition Parser.php:315
static cleanSigInSig( $text)
Strip 3, 4 or 5 tildes out of signatures.
Definition Parser.php:4819
callParserFunction(PPFrame $frame, $function, array $args=[])
Call a parser function and return an array with text and flags.
Definition Parser.php:3373
attributeStripCallback(&$text, $frame=false)
Callback from the Sanitizer for expanding items found in HTML attribute values, so they can be safely...
Definition Parser.php:5693
__destruct()
Reduce memory usage to reduce the impact of circular references.
Definition Parser.php:533
recursiveTagParse( $text, $frame=false)
Half-parse wikitext to half-parsed HTML.
Definition Parser.php:846
replaceVariables( $text, $frame=false, $argsOnly=false)
Replace magic variables, templates, and template arguments with the appropriate text.
Definition Parser.php:2917
doQuotes( $text)
Helper function for handleAllQuotes()
Definition Parser.php:1969
static replaceTableOfContentsMarker( $text, $toc)
Replace table of contents marker in parsed HTML.
Definition Parser.php:4840
const OT_HTML
Output type: like Parser::parse()
Definition Parser.php:192
recursiveTagParseFully( $text, $frame=false)
Fully parse wikitext to fully parsed HTML.
Definition Parser.php:870
fetchFileNoRegister(LinkTarget $link, array $options=[])
Helper function for fetchFileAndTitle.
Definition Parser.php:3790
getPage()
Returns the page used as context for parsing, e.g.
Definition Parser.php:1036
fetchFileAndTitle(LinkTarget $link, array $options=[])
Fetch a file and its title and register a reference to it.
Definition Parser.php:3763
fetchCurrentRevisionRecordOfTitle(LinkTarget $link)
Fetch the current revision of a given title as a RevisionRecord.
Definition Parser.php:3508
startExternalParse(?PageReference $page, ParserOptions $options, $outputType, $clearState=true, $revId=null)
Set up some variables which are usually set up in parse() so that an external function can call some ...
Definition Parser.php:4856
getFunctionLang()
Get a language object for use in parser functions such as {{FORMATNUM:}}.
Definition Parser.php:1145
resetOutput()
Reset the ParserOutput.
Definition Parser.php:636
static removeSomeTags(string $text, array $options=[])
Cleans up HTML, removes dangerous tags and attributes, and removes HTML comments; the result will alw...
WebRequest clone which takes values from a provided array.
Exception representing a failure to look up a revision.
Page revision base class.
Value object representing a content slot associated with a page revision.
Factory for handling the special page list and generating SpecialPage objects.
Parent class for all special pages.
Base class for HTML cleanup utilities.
MalformedTitleException is thrown when a TitleParser is unable to parse a title string.
A codec for MediaWiki page titles.
This is a utility class for dealing with namespaces that encodes all the "magic" behaviors of them ba...
Represents a title within MediaWiki.
Definition Title.php:78
Provides access to user options.
Creates User objects.
UserNameUtils service.
internal since 1.36
Definition User.php:93
Library for creating and parsing MW-style timestamps.
A service to expand, parse, and otherwise manipulate URLs.
Definition UrlUtils.php:16
validProtocols()
Returns a regular expression of recognized URL protocols.
Definition UrlUtils.php:354
static int $inParserFactory
Track calls to Parser constructor to aid in deprecation of direct Parser invocation.
Set options of the Parser.
getExpensiveParserFunctionLimit()
Maximum number of calls per parse to expensive parser functions.
getPreSaveTransform()
Transform wiki markup when saving the page?
getDisableTitleConversion()
Whether title conversion should be disabled.
Differences from DOM schema:
const DOM_FOR_INCLUSION
Transclusion mode flag for Preprocessor::preprocessToObj()
Arbitrary section name based PHP profiling.
A collection of static methods to play with strings.
static replaceMarkup( $search, $replace, $text)
More or less "markup-safe" str_replace() Ignores any instances of the separator inside <....
static explode( $separator, $subject)
Workalike for explode() with limited memory usage.
Content object implementation for representing flat text.
Multi-datacenter aware caching interface.
Module of static functions for generating XML.
Definition Xml.php:33
return[0=> 'ـ', 1=> ' ', 2=> '`', 3=> '´', 4=> '˜', 5=> '^', 6=> '¯', 7=> '‾', 8=> '˘', 9=> '˙', 10=> '¨', 11=> '˚', 12=> '˝', 13=> '᾽', 14=> '῝', 15=> '¸', 16=> '˛', 17=> '_', 18=> '‗', 19=> '῀', 20=> '﮲', 21=> '﮳', 22=> '﮴', 23=> '﮵', 24=> '﮶', 25=> '﮷', 26=> '﮸', 27=> '﮹', 28=> '﮺', 29=> '﮻', 30=> '﮼', 31=> '﮽', 32=> '﮾', 33=> '﮿', 34=> '﯀', 35=> '﯁', 36=> '゛', 37=> '゜', 38=> '-', 39=> '֊', 40=> '᐀', 41=> '᭠', 42=> '᠆', 43=> '᠇', 44=> '‐', 45=> '‒', 46=> '–', 47=> '—', 48=> '―', 49=> '⁓', 50=> '⸗', 51=> '゠', 52=> '・', 53=> ',', 54=> '՝', 55=> '،', 56=> '؍', 57=> '٫', 58=> '٬', 59=> '߸', 60=> '᠂', 61=> '᠈', 62=> '꓾', 63=> '꘍', 64=> '꛵', 65=> '︑', 66=> ';', 67=> '؛', 68=> '⁏', 69=> '꛶', 70=> ':', 71=> '։', 72=> '؞', 73=> '܃', 74=> '܄', 75=> '܅', 76=> '܆', 77=> '܇', 78=> '܈', 79=> '࠰', 80=> '࠱', 81=> '࠲', 82=> '࠳', 83=> '࠴', 84=> '࠵', 85=> '࠶', 86=> '࠷', 87=> '࠸', 88=> '࠹', 89=> '࠺', 90=> '࠻', 91=> '࠼', 92=> '࠽', 93=> '࠾', 94=> '፡', 95=> '፣', 96=> '፤', 97=> '፥', 98=> '፦', 99=> '᠄', 100=> '᠅', 101=> '༔', 102=> '៖', 103=> '᭝', 104=> '꧇', 105=> '᛫', 106=> '᛬', 107=> '᛭', 108=> '꛴', 109=> '!', 110=> '¡', 111=> '՜', 112=> '߹', 113=> '᥄', 114=> '?', 115=> '¿', 116=> '⸮', 117=> '՞', 118=> '؟', 119=> '܉', 120=> '፧', 121=> '᥅', 122=> '⳺', 123=> '⳻', 124=> '꘏', 125=> '꛷', 126=> '‽', 127=> '⸘', 128=> '.', 129=> '᠁', 130=> '۔', 131=> '܁', 132=> '܂', 133=> '።', 134=> '᠃', 135=> '᠉', 136=> '᙮', 137=> '᭜', 138=> '⳹', 139=> '⳾', 140=> '⸰', 141=> '꓿', 142=> '꘎', 143=> '꛳', 144=> '︒', 145=> '·', 146=> '⸱', 147=> '।', 148=> '॥', 149=> '꣎', 150=> '꣏', 151=> '᰻', 152=> '᰼', 153=> '꡶', 154=> '꡷', 155=> '᜵', 156=> '᜶', 157=> '꤯', 158=> '၊', 159=> '။', 160=> '។', 161=> '៕', 162=> '᪨', 163=> '᪩', 164=> '᪪', 165=> '᪫', 166=> '᭞', 167=> '᭟', 168=> '꧈', 169=> '꧉', 170=> '꩝', 171=> '꩞', 172=> '꩟', 173=> '꯫', 174=> '𐩖', 175=> '𐩗', 176=> '𑁇', 177=> '𑁈', 178=> '𑃀', 179=> '𑃁', 180=> '᱾', 181=> '᱿', 182=> '܀', 183=> '߷', 184=> '჻', 185=> '፠', 186=> '፨', 187=> '᨞', 188=> '᨟', 189=> '᭚', 190=> '᭛', 191=> '꧁', 192=> '꧂', 193=> '꧃', 194=> '꧄', 195=> '꧅', 196=> '꧆', 197=> '꧊', 198=> '꧋', 199=> '꧌', 200=> '꧍', 201=> '꛲', 202=> '꥟', 203=> '𐡗', 204=> '𐬺', 205=> '𐬻', 206=> '𐬼', 207=> '𐬽', 208=> '𐬾', 209=> '𐬿', 210=> '𑂾', 211=> '𑂿', 212=> '⁕', 213=> '⁖', 214=> '⁘', 215=> '⁙', 216=> '⁚', 217=> '⁛', 218=> '⁜', 219=> '⁝', 220=> '⁞', 221=> '⸪', 222=> '⸫', 223=> '⸬', 224=> '⸭', 225=> '⳼', 226=> '⳿', 227=> '⸙', 228=> '𐤿', 229=> '𐄀', 230=> '𐄁', 231=> '𐄂', 232=> '𐎟', 233=> '𐏐', 234=> '𐤟', 235=> '𒑰', 236=> '𒑱', 237=> '𒑲', 238=> '𒑳', 239=> '\'', 240=> '‘', 241=> '’', 242=> '‚', 243=> '‛', 244=> '‹', 245=> '›', 246=> '"', 247 => '“', 248 => '”', 249 => '„', 250 => '‟', 251 => '«', 252 => '»', 253 => '(', 254 => ')', 255 => '[', 256 => ']', 257 => '{', 258 => '}', 259 => '༺', 260 => '༻', 261 => '༼', 262 => '༽', 263 => '᚛', 264 => '᚜', 265 => '⁅', 266 => '⁆', 267 => '⧼', 268 => '⧽', 269 => '⦃', 270 => '⦄', 271 => '⦅', 272 => '⦆', 273 => '⦇', 274 => '⦈', 275 => '⦉', 276 => '⦊', 277 => '⦋', 278 => '⦌', 279 => '⦍', 280 => '⦎', 281 => '⦏', 282 => '⦐', 283 => '⦑', 284 => '⦒', 285 => '⦓', 286 => '⦔', 287 => '⦕', 288 => '⦖', 289 => '⦗', 290 => '⦘', 291 => '⟬', 292 => '⟭', 293 => '⟮', 294 => '⟯', 295 => '⸂', 296 => '⸃', 297 => '⸄', 298 => '⸅', 299 => '⸉', 300 => '⸊', 301 => '⸌', 302 => '⸍', 303 => '⸜', 304 => '⸝', 305 => '⸠', 306 => '⸡', 307 => '⸢', 308 => '⸣', 309 => '⸤', 310 => '⸥', 311 => '⸦', 312 => '⸧', 313 => '⸨', 314 => '⸩', 315 => '〈', 316 => '〉', 317 => '「', 318 => '」', 319 => '﹝', 320 => '﹞', 321 => '︗', 322 => '︘', 323 => '﴾', 324 => '﴿', 325 => '§', 326 => '¶', 327 => '⁋', 328 => '©', 329 => '®', 330 => '@', 331 => '*', 332 => '⁎', 333 => '⁑', 334 => '٭', 335 => '꙳', 336 => '/', 337 => '⁄', 338 => '\\', 339 => '&', 340 => '⅋', 341 => '⁊', 342 => '#', 343 => '%', 344 => '٪', 345 => '‰', 346 => '؉', 347 => '‱', 348 => '؊', 349 => '⁒', 350 => '†', 351 => '‡', 352 => '•', 353 => '‣', 354 => '‧', 355 => '⁃', 356 => '⁌', 357 => '⁍', 358 => '′', 359 => '‵', 360 => '‸', 361 => '※', 362 => '‿', 363 => '⁔', 364 => '⁀', 365 => '⁐', 366 => '⁁', 367 => '⁂', 368 => '⸀', 369 => '⸁', 370 => '⸆', 371 => '⸇', 372 => '⸈', 373 => '⸋', 374 => '⸎', 375 => '⸏', 376 => '⸐', 377 => '⸑', 378 => '⸒', 379 => '⸓', 380 => '⸔', 381 => '⸕', 382 => '⸖', 383 => '⸚', 384 => '⸛', 385 => '⸞', 386 => '⸟', 387 => '꙾', 388 => '՚', 389 => '՛', 390 => '՟', 391 => '־', 392 => '׀', 393 => '׃', 394 => '׆', 395 => '׳', 396 => '״', 397 => '܊', 398 => '܋', 399 => '܌', 400 => '܍', 401 => '࡞', 402 => '᠀', 403 => '॰', 404 => '꣸', 405 => '꣹', 406 => '꣺', 407 => '෴', 408 => '๚', 409 => '๛', 410 => '꫞', 411 => '꫟', 412 => '༄', 413 => '༅', 414 => '༆', 415 => '༇', 416 => '༈', 417 => '༉', 418 => '༊', 419 => '࿐', 420 => '࿑', 421 => '་', 422 => '།', 423 => '༎', 424 => '༏', 425 => '༐', 426 => '༑', 427 => '༒', 428 => '྅', 429 => '࿒', 430 => '࿓', 431 => '࿔', 432 => '࿙', 433 => '࿚', 434 => '᰽', 435 => '᰾', 436 => '᰿', 437 => '᥀', 438 => '၌', 439 => '၍', 440 => '၎', 441 => '၏', 442 => '႞', 443 => '႟', 444 => '꩷', 445 => '꩸', 446 => '꩹', 447 => 'ៗ', 448 => '៘', 449 => '៙', 450 => '៚', 451 => '᪠', 452 => '᪡', 453 => '᪢', 454 => '᪣', 455 => '᪤', 456 => '᪥', 457 => '᪦', 458 => '᪬', 459 => '᪭', 460 => '᙭', 461 => '⵰', 462 => '꡴', 463 => '꡵', 464 => '᯼', 465 => '᯽', 466 => '᯾', 467 => '᯿', 468 => '꤮', 469 => '꧞', 470 => '꧟', 471 => '꩜', 472 => '𑁉', 473 => '𑁊', 474 => '𑁋', 475 => '𑁌', 476 => '𑁍', 477 => '𐩐', 478 => '𐩑', 479 => '𐩒', 480 => '𐩓', 481 => '𐩔', 482 => '𐩕', 483 => '𐩘', 484 => '𐬹', 485 => '𑂻', 486 => '𑂼', 487 => 'ʹ', 488 => '͵', 489 => 'ʺ', 490 => '˂', 491 => '˃', 492 => '˄', 493 => '˅', 494 => 'ˆ', 495 => 'ˇ', 496 => 'ˈ', 497 => 'ˉ', 498 => 'ˊ', 499 => 'ˋ', 500 => 'ˌ', 501 => 'ˍ', 502 => 'ˎ', 503 => 'ˏ', 504 => '˒', 505 => '˓', 506 => '˔', 507 => '˕', 508 => '˖', 509 => '˗', 510 => '˞', 511 => '˟', 512 => '˥', 513 => '˦', 514 => '˧', 515 => '˨', 516 => '˩', 517 => '˪', 518 => '˫', 519 => 'ˬ', 520 => '˭', 521 => '˯', 522 => '˰', 523 => '˱', 524 => '˲', 525 => '˳', 526 => '˴', 527 => '˵', 528 => '˶', 529 => '˷', 530 => '˸', 531 => '˹', 532 => '˺', 533 => '˻', 534 => '˼', 535 => '˽', 536 => '˾', 537 => '˿', 538 => '᎐', 539 => '᎑', 540 => '᎒', 541 => '᎓', 542 => '᎔', 543 => '᎕', 544 => '᎖', 545 => '᎗', 546 => '᎘', 547 => '᎙', 548 => '꜀', 549 => '꜁', 550 => '꜂', 551 => '꜃', 552 => '꜄', 553 => '꜅', 554 => '꜆', 555 => '꜇', 556 => '꜈', 557 => '꜉', 558 => '꜊', 559 => '꜋', 560 => '꜌', 561 => '꜍', 562 => '꜎', 563 => '꜏', 564 => '꜐', 565 => '꜑', 566 => '꜒', 567 => '꜓', 568 => '꜔', 569 => '꜕', 570 => '꜖', 571 => 'ꜗ', 572 => 'ꜘ', 573 => 'ꜙ', 574 => 'ꜚ', 575 => 'ꜛ', 576 => 'ꜜ', 577 => 'ꜝ', 578 => 'ꜞ', 579 => 'ꜟ', 580 => '꜠', 581 => '꜡', 582 => 'ꞈ', 583 => '꞉', 584 => '꞊', 585 => '°', 586 => '҂', 587 => '؈', 588 => '؎', 589 => '؏', 590 => '۞', 591 => '۩', 592 => '﷽', 593 => '߶', 594 => '৺', 595 => '୰', 596 => '௳', 597 => '௴', 598 => '௵', 599 => '௶', 600 => '௷', 601 => '௸', 602 => '௺', 603 => '౿', 604 => '൹', 605 => '꠨', 606 => '꠩', 607 => '꠪', 608 => '꠫', 609 => '꠶', 610 => '꠷', 611 => '꠹', 612 => '๏', 613 => '༁', 614 => '༂', 615 => '༃', 616 => '༓', 617 => '༕', 618 => '༖', 619 => '༗', 620 => '༚', 621 => '༛', 622 => '༜', 623 => '༝', 624 => '༞', 625 => '༟', 626 => '༴', 627 => '༶', 628 => '༸', 629 => '྾', 630 => '྿', 631 => '࿀', 632 => '࿁', 633 => '࿂', 634 => '࿃', 635 => '࿄', 636 => '࿅', 637 => '࿇', 638 => '࿈', 639 => '࿉', 640 => '࿊', 641 => '࿋', 642 => '࿌', 643 => '࿎', 644 => '࿏', 645 => '࿕', 646 => '࿖', 647 => '࿗', 648 => '࿘', 649 => '᧠', 650 => '᧡', 651 => '᧢', 652 => '᧣', 653 => '᧤', 654 => '᧥', 655 => '᧦', 656 => '᧧', 657 => '᧨', 658 => '᧩', 659 => '᧪', 660 => '᧫', 661 => '᧬', 662 => '᧭', 663 => '᧮', 664 => '᧯', 665 => '᧰', 666 => '᧱', 667 => '᧲', 668 => '᧳', 669 => '᧴', 670 => '᧵', 671 => '᧶', 672 => '᧷', 673 => '᧸', 674 => '᧹', 675 => '᧺', 676 => '᧻', 677 => '᧼', 678 => '᧽', 679 => '᧾', 680 => '᧿', 681 => '᭡', 682 => '᭢', 683 => '᭣', 684 => '᭤', 685 => '᭥', 686 => '᭦', 687 => '᭧', 688 => '᭨', 689 => '᭩', 690 => '᭪', 691 => '᭴', 692 => '᭵', 693 => '᭶', 694 => '᭷', 695 => '᭸', 696 => '᭹', 697 => '᭺', 698 => '᭻', 699 => '᭼', 700 => '℄', 701 => '℈', 702 => '℔', 703 => '℗', 704 => '℘', 705 => '℞', 706 => '℟', 707 => '℣', 708 => '℥', 709 => '℧', 710 => '℩', 711 => '℮', 712 => '℺', 713 => '⅁', 714 => '⅂', 715 => '⅃', 716 => '⅄', 717 => '⅊', 718 => '⅌', 719 => '⅍', 720 => '⅏', 721 => '←', 722 => '→', 723 => '↑', 724 => '↓', 725 => '↔', 726 => '↕', 727 => '↖', 728 => '↗', 729 => '↘', 730 => '↙', 731 => '↜', 732 => '↝', 733 => '↞', 734 => '↟', 735 => '↠', 736 => '↡', 737 => '↢', 738 => '↣', 739 => '↤', 740 => '↥', 741 => '↦', 742 => '↧', 743 => '↨', 744 => '↩', 745 => '↪', 746 => '↫', 747 => '↬', 748 => '↭', 749 => '↯', 750 => '↰', 751 => '↱', 752 => '↲', 753 => '↳', 754 => '↴', 755 => '↵', 756 => '↶', 757 => '↷', 758 => '↸', 759 => '↹', 760 => '↺', 761 => '↻', 762 => '↼', 763 => '↽', 764 => '↾', 765 => '↿', 766 => '⇀', 767 => '⇁', 768 => '⇂', 769 => '⇃', 770 => '⇄', 771 => '⇅', 772 => '⇆', 773 => '⇇', 774 => '⇈', 775 => '⇉', 776 => '⇊', 777 => '⇋', 778 => '⇌', 779 => '⇐', 780 => '⇑', 781 => '⇒', 782 => '⇓', 783 => '⇔', 784 => '⇕', 785 => '⇖', 786 => '⇗', 787 => '⇘', 788 => '⇙', 789 => '⇚', 790 => '⇛', 791 => '⇜', 792 => '⇝', 793 => '⇞', 794 => '⇟', 795 => '⇠', 796 => '⇡', 797 => '⇢', 798 => '⇣', 799 => '⇤', 800 => '⇥', 801 => '⇦', 802 => '⇧', 803 => '⇨', 804 => '⇩', 805 => '⇪', 806 => '⇫', 807 => '⇬', 808 => '⇭', 809 => '⇮', 810 => '⇯', 811 => '⇰', 812 => '⇱', 813 => '⇲', 814 => '⇳', 815 => '⇴', 816 => '⇵', 817 => '⇶', 818 => '⇷', 819 => '⇸', 820 => '⇹', 821 => '⇺', 822 => '⇻', 823 => '⇼', 824 => '⇽', 825 => '⇾', 826 => '⇿', 827 => '∀', 828 => '∁', 829 => '∂', 830 => '∃', 831 => '∅', 832 => '∆', 833 => '∇', 834 => '∈', 835 => '∊', 836 => '∋', 837 => '∍', 838 => '϶', 839 => '∎', 840 => '∏', 841 => '∐', 842 => '∑', 843 => '+', 844 => '±', 845 => '÷', 846 => '×', 847 => '<', 848 => '=', 849 => '>', 850 => '¬', 851 => '|', 852 => '¦', 853 => '‖', 854 => '~', 855 => '−', 856 => '∓', 857 => '∔', 858 => '∕', 859 => '∖', 860 => '∗', 861 => '∘', 862 => '∙', 863 => '√', 864 => '∛', 865 => '؆', 866 => '∜', 867 => '؇', 868 => '∝', 869 => '∞', 870 => '∟', 871 => '∠', 872 => '∡', 873 => '∢', 874 => '∣', 875 => '∥', 876 => '∧', 877 => '∨', 878 => '∩', 879 => '∪', 880 => '∫', 881 => '∮', 882 => '∱', 883 => '∲', 884 => '∳', 885 => '∴', 886 => '∵', 887 => '∶', 888 => '∷', 889 => '∸', 890 => '∹', 891 => '∺', 892 => '∻', 893 => '∼', 894 => '∽', 895 => '∾', 896 => '∿', 897 => '≀', 898 => '≂', 899 => '≃', 900 => '≅', 901 => '≆', 902 => '≈', 903 => '≊', 904 => '≋', 905 => '≌', 906 => '≍', 907 => '≎', 908 => '≏', 909 => '≐', 910 => '≑', 911 => '≒', 912 => '≓', 913 => '≔', 914 => '≕', 915 => '≖', 916 => '≗', 917 => '≘', 918 => '≙', 919 => '≚', 920 => '≛', 921 => '≜', 922 => '≝', 923 => '≞', 924 => '≟', 925 => '≡', 926 => '≣', 927 => '≤', 928 => '≥', 929 => '≦', 930 => '≧', 931 => '≨', 932 => '≩', 933 => '≪', 934 => '≫', 935 => '≬', 936 => '≲', 937 => '≳', 938 => '≶', 939 => '≷', 940 => '≺', 941 => '≻', 942 => '≼', 943 => '≽', 944 => '≾', 945 => '≿', 946 => '⊂', 947 => '⊃', 948 => '⊆', 949 => '⊇', 950 => '⊊', 951 => '⊋', 952 => '⊌', 953 => '⊍', 954 => '⊎', 955 => '⊏', 956 => '⊐', 957 => '⊑', 958 => '⊒', 959 => '⊓', 960 => '⊔', 961 => '⊕', 962 => '⊖', 963 => '⊗', 964 => '⊘', 965 => '⊙', 966 => '⊚', 967 => '⊛', 968 => '⊜', 969 => '⊝', 970 => '⊞', 971 => '⊟', 972 => '⊠', 973 => '⊡', 974 => '⊢', 975 => '⊣', 976 => '⊤', 977 => '⊥', 978 => '⊦', 979 => '⊧', 980 => '⊨', 981 => '⊩', 982 => '⊪', 983 => '⊫', 984 => '⊰', 985 => '⊱', 986 => '⊲', 987 => '⊳', 988 => '⊴', 989 => '⊵', 990 => '⊶', 991 => '⊷', 992 => '⊸', 993 => '⊹', 994 => '⊺', 995 => '⊻', 996 => '⊼', 997 => '⊽', 998 => '⊾', 999 => '⊿', 1000 => '⋀', 1001 => '⋁', 1002 => '⋂', 1003 => '⋃', 1004 => '⋄', 1005 => '⋅', 1006 => '⋆', 1007 => '⋇', 1008 => '⋈', 1009 => '⋉', 1010 => '⋊', 1011 => '⋋', 1012 => '⋌', 1013 => '⋍', 1014 => '⋎', 1015 => '⋏', 1016 => '⋐', 1017 => '⋑', 1018 => '⋒', 1019 => '⋓', 1020 => '⋔', 1021 => '⋕', 1022 => '⋖', 1023 => '⋗', 1024 => '⋘', 1025 => '⋙', 1026 => '⋚', 1027 => '⋛', 1028 => '⋜', 1029 => '⋝', 1030 => '⋞', 1031 => '⋟', 1032 => '⋤', 1033 => '⋥', 1034 => '⋦', 1035 => '⋧', 1036 => '⋨', 1037 => '⋩', 1038 => '⋮', 1039 => '⋯', 1040 => '⋰', 1041 => '⋱', 1042 => '⋲', 1043 => '⋳', 1044 => '⋴', 1045 => '⋵', 1046 => '⋶', 1047 => '⋷', 1048 => '⋸', 1049 => '⋹', 1050 => '⋺', 1051 => '⋻', 1052 => '⋼', 1053 => '⋽', 1054 => '⋾', 1055 => '⋿', 1056 => '⌀', 1057 => '⌁', 1058 => '⌂', 1059 => '⌃', 1060 => '⌄', 1061 => '⌅', 1062 => '⌆', 1063 => '⌇', 1064 => '⌈', 1065 => '⌉', 1066 => '⌊', 1067 => '⌋', 1068 => '⌌', 1069 => '⌍', 1070 => '⌎', 1071 => '⌏', 1072 => '⌐', 1073 => '⌑', 1074 => '⌒', 1075 => '⌓', 1076 => '⌔', 1077 => '⌕', 1078 => '⌖', 1079 => '⌗', 1080 => '⌘', 1081 => '⌙', 1082 => '⌚', 1083 => '⌛', 1084 => '⌜', 1085 => '⌝', 1086 => '⌞', 1087 => '⌟', 1088 => '⌠', 1089 => '⌡', 1090 => '⌢', 1091 => '⌣', 1092 => '⌤', 1093 => '⌥', 1094 => '⌦', 1095 => '⌧', 1096 => '⌨', 1097 => '⌫', 1098 => '⌬', 1099 => '⌭', 1100 => '⌮', 1101 => '⌯', 1102 => '⌰', 1103 => '⌱', 1104 => '⌲', 1105 => '⌳', 1106 => '⌴', 1107 => '⌵', 1108 => '⌶', 1109 => '⌷', 1110 => '⌸', 1111 => '⌹', 1112 => '⌺', 1113 => '⌻', 1114 => '⌼', 1115 => '⌽', 1116 => '⌾', 1117 => '⌿', 1118 => '⍀', 1119 => '⍁', 1120 => '⍂', 1121 => '⍃', 1122 => '⍄', 1123 => '⍅', 1124 => '⍆', 1125 => '⍇', 1126 => '⍈', 1127 => '⍉', 1128 => '⍊', 1129 => '⍋', 1130 => '⍌', 1131 => '⍍', 1132 => '⍎', 1133 => '⍏', 1134 => '⍐', 1135 => '⍑', 1136 => '⍒', 1137 => '⍓', 1138 => '⍔', 1139 => '⍕', 1140 => '⍖', 1141 => '⍗', 1142 => '⍘', 1143 => '⍙', 1144 => '⍚', 1145 => '⍛', 1146 => '⍜', 1147 => '⍝', 1148 => '⍞', 1149 => '⍟', 1150 => '⍠', 1151 => '⍡', 1152 => '⍢', 1153 => '⍣', 1154 => '⍤', 1155 => '⍥', 1156 => '⍦', 1157 => '⍧', 1158 => '⍨', 1159 => '⍩', 1160 => '⍪', 1161 => '⍫', 1162 => '⍬', 1163 => '⍭', 1164 => '⍮', 1165 => '⍯', 1166 => '⍰', 1167 => '⍱', 1168 => '⍲', 1169 => '⍳', 1170 => '⍴', 1171 => '⍵', 1172 => '⍶', 1173 => '⍷', 1174 => '⍸', 1175 => '⍹', 1176 => '⍺', 1177 => '⍻', 1178 => '⍼', 1179 => '⍽', 1180 => '⍾', 1181 => '⍿', 1182 => '⎀', 1183 => '⎁', 1184 => '⎂', 1185 => '⎃', 1186 => '⎄', 1187 => '⎅', 1188 => '⎆', 1189 => '⎇', 1190 => '⎈', 1191 => '⎉', 1192 => '⎊', 1193 => '⎋', 1194 => '⎌', 1195 => '⎍', 1196 => '⎎', 1197 => '⎏', 1198 => '⎐', 1199 => '⎑', 1200 => '⎒', 1201 => '⎓', 1202 => '⎔', 1203 => '⎕', 1204 => '⎖', 1205 => '⎗', 1206 => '⎘', 1207 => '⎙', 1208 => '⎚', 1209 => '⎛', 1210 => '⎜', 1211 => '⎝', 1212 => '⎞', 1213 => '⎟', 1214 => '⎠', 1215 => '⎡', 1216 => '⎢', 1217 => '⎣', 1218 => '⎤', 1219 => '⎥', 1220 => '⎦', 1221 => '⎧', 1222 => '⎨', 1223 => '⎩', 1224 => '⎪', 1225 => '⎫', 1226 => '⎬', 1227 => '⎭', 1228 => '⎮', 1229 => '⎯', 1230 => '⎰', 1231 => '⎱', 1232 => '⎲', 1233 => '⎳', 1234 => '⎴', 1235 => '⎵', 1236 => '⎶', 1237 => '⎷', 1238 => '⎸', 1239 => '⎹', 1240 => '⎺', 1241 => '⎻', 1242 => '⎼', 1243 => '⎽', 1244 => '⎾', 1245 => '⎿', 1246 => '⏀', 1247 => '⏁', 1248 => '⏂', 1249 => '⏃', 1250 => '⏄', 1251 => '⏅', 1252 => '⏆', 1253 => '⏇', 1254 => '⏈', 1255 => '⏉', 1256 => '⏊', 1257 => '⏋', 1258 => '⏌', 1259 => '⏍', 1260 => '⏎', 1261 => '⏏', 1262 => '⏐', 1263 => '⏑', 1264 => '⏒', 1265 => '⏓', 1266 => '⏔', 1267 => '⏕', 1268 => '⏖', 1269 => '⏗', 1270 => '⏘', 1271 => '⏙', 1272 => '⏚', 1273 => '⏛', 1274 => '⏜', 1275 => '⏝', 1276 => '⏞', 1277 => '⏟', 1278 => '⏠', 1279 => '⏡', 1280 => '⏢', 1281 => '⏣', 1282 => '⏤', 1283 => '⏥', 1284 => '⏦', 1285 => '⏧', 1286 => '⏨', 1287 => '⏩', 1288 => '⏪', 1289 => '⏫', 1290 => '⏬', 1291 => '⏭', 1292 => '⏮', 1293 => '⏯', 1294 => '⏰', 1295 => '⏱', 1296 => '⏲', 1297 => '⏳', 1298 => '␀', 1299 => '␁', 1300 => '␂', 1301 => '␃', 1302 => '␄', 1303 => '␅', 1304 => '␆', 1305 => '␇', 1306 => '␈', 1307 => '␉', 1308 => '␊', 1309 => '␋', 1310 => '␌', 1311 => '␍', 1312 => '␎', 1313 => '␏', 1314 => '␐', 1315 => '␑', 1316 => '␒', 1317 => '␓', 1318 => '␔', 1319 => '␕', 1320 => '␖', 1321 => '␗', 1322 => '␘', 1323 => '␙', 1324 => '␚', 1325 => '␛', 1326 => '␜', 1327 => '␝', 1328 => '␞', 1329 => '␟', 1330 => '␠', 1331 => '␡', 1332 => '␢', 1333 => '␣', 1334 => '␤', 1335 => '␥', 1336 => '␦', 1337 => '⑀', 1338 => '⑁', 1339 => '⑂', 1340 => '⑃', 1341 => '⑄', 1342 => '⑅', 1343 => '⑆', 1344 => '⑇', 1345 => '⑈', 1346 => '⑉', 1347 => '⑊', 1348 => '─', 1349 => '━', 1350 => '│', 1351 => '┃', 1352 => '┄', 1353 => '┅', 1354 => '┆', 1355 => '┇', 1356 => '┈', 1357 => '┉', 1358 => '┊', 1359 => '┋', 1360 => '┌', 1361 => '┍', 1362 => '┎', 1363 => '┏', 1364 => '┐', 1365 => '┑', 1366 => '┒', 1367 => '┓', 1368 => '└', 1369 => '┕', 1370 => '┖', 1371 => '┗', 1372 => '┘', 1373 => '┙', 1374 => '┚', 1375 => '┛', 1376 => '├', 1377 => '┝', 1378 => '┞', 1379 => '┟', 1380 => '┠', 1381 => '┡', 1382 => '┢', 1383 => '┣', 1384 => '┤', 1385 => '┥', 1386 => '┦', 1387 => '┧', 1388 => '┨', 1389 => '┩', 1390 => '┪', 1391 => '┫', 1392 => '┬', 1393 => '┭', 1394 => '┮', 1395 => '┯', 1396 => '┰', 1397 => '┱', 1398 => '┲', 1399 => '┳', 1400 => '┴', 1401 => '┵', 1402 => '┶', 1403 => '┷', 1404 => '┸', 1405 => '┹', 1406 => '┺', 1407 => '┻', 1408 => '┼', 1409 => '┽', 1410 => '┾', 1411 => '┿', 1412 => '╀', 1413 => '╁', 1414 => '╂', 1415 => '╃', 1416 => '╄', 1417 => '╅', 1418 => '╆', 1419 => '╇', 1420 => '╈', 1421 => '╉', 1422 => '╊', 1423 => '╋', 1424 => '╌', 1425 => '╍', 1426 => '╎', 1427 => '╏', 1428 => '═', 1429 => '║', 1430 => '╒', 1431 => '╓', 1432 => '╔', 1433 => '╕', 1434 => '╖', 1435 => '╗', 1436 => '╘', 1437 => '╙', 1438 => '╚', 1439 => '╛', 1440 => '╜', 1441 => '╝', 1442 => '╞', 1443 => '╟', 1444 => '╠', 1445 => '╡', 1446 => '╢', 1447 => '╣', 1448 => '╤', 1449 => '╥', 1450 => '╦', 1451 => '╧', 1452 => '╨', 1453 => '╩', 1454 => '╪', 1455 => '╫', 1456 => '╬', 1457 => '╭', 1458 => '╮', 1459 => '╯', 1460 => '╰', 1461 => '╱', 1462 => '╲', 1463 => '╳', 1464 => '╴', 1465 => '╵', 1466 => '╶', 1467 => '╷', 1468 => '╸', 1469 => '╹', 1470 => '╺', 1471 => '╻', 1472 => '╼', 1473 => '╽', 1474 => '╾', 1475 => '╿', 1476 => '▀', 1477 => '▁', 1478 => '▂', 1479 => '▃', 1480 => '▄', 1481 => '▅', 1482 => '▆', 1483 => '▇', 1484 => '█', 1485 => '▉', 1486 => '▊', 1487 => '▋', 1488 => '▌', 1489 => '▍', 1490 => '▎', 1491 => '▏', 1492 => '▐', 1493 => '░', 1494 => '▒', 1495 => '▓', 1496 => '▔', 1497 => '▕', 1498 => '▖', 1499 => '▗', 1500 => '▘', 1501 => '▙', 1502 => '▚', 1503 => '▛', 1504 => '▜', 1505 => '▝', 1506 => '▞', 1507 => '▟', 1508 => '■', 1509 => '□', 1510 => '▢', 1511 => '▣', 1512 => '▤', 1513 => '▥', 1514 => '▦', 1515 => '▧', 1516 => '▨', 1517 => '▩', 1518 => '▪', 1519 => '▫', 1520 => '▬', 1521 => '▭', 1522 => '▮', 1523 => '▯', 1524 => '▰', 1525 => '▱', 1526 => '▲', 1527 => '△', 1528 => '▴', 1529 => '▵', 1530 => '▶', 1531 => '▷', 1532 => '▸', 1533 => '▹', 1534 => '►', 1535 => '▻', 1536 => '▼', 1537 => '▽', 1538 => '▾', 1539 => '▿', 1540 => '◀', 1541 => '◁', 1542 => '◂', 1543 => '◃', 1544 => '◄', 1545 => '◅', 1546 => '◆', 1547 => '◇', 1548 => '◈', 1549 => '◉', 1550 => '◊', 1551 => '○', 1552 => '◌', 1553 => '◍', 1554 => '◎', 1555 => '●', 1556 => '◐', 1557 => '◑', 1558 => '◒', 1559 => '◓', 1560 => '◔', 1561 => '◕', 1562 => '◖', 1563 => '◗', 1564 => '◘', 1565 => '◙', 1566 => '◚', 1567 => '◛', 1568 => '◜', 1569 => '◝', 1570 => '◞', 1571 => '◟', 1572 => '◠', 1573 => '◡', 1574 => '◢', 1575 => '◣', 1576 => '◤', 1577 => '◥', 1578 => '◦', 1579 => '◧', 1580 => '◨', 1581 => '◩', 1582 => '◪', 1583 => '◫', 1584 => '◬', 1585 => '◭', 1586 => '◮', 1587 => '◯', 1588 => '◰', 1589 => '◱', 1590 => '◲', 1591 => '◳', 1592 => '◴', 1593 => '◵', 1594 => '◶', 1595 => '◷', 1596 => '◸', 1597 => '◹', 1598 => '◺', 1599 => '◻', 1600 => '◼', 1601 => '◽', 1602 => '◾', 1603 => '◿', 1604 => '☀', 1605 => '☁', 1606 => '☂', 1607 => '☃', 1608 => '☄', 1609 => '★', 1610 => '☆', 1611 => '☇', 1612 => '☈', 1613 => '☉', 1614 => '☊', 1615 => '☋', 1616 => '☌', 1617 => '☍', 1618 => '☎', 1619 => '☏', 1620 => '☐', 1621 => '☑', 1622 => '☒', 1623 => '☓', 1624 => '☔', 1625 => '☕', 1626 => '☖', 1627 => '☗', 1628 => '☘', 1629 => '☙', 1630 => '☚', 1631 => '☛', 1632 => '☜', 1633 => '☝', 1634 => '☞', 1635 => '☟', 1636 => '☠', 1637 => '☡', 1638 => '☢', 1639 => '☣', 1640 => '☤', 1641 => '☥', 1642 => '☦', 1643 => '☧', 1644 => '☨', 1645 => '☩', 1646 => '☪', 1647 => '☫', 1648 => '☬', 1649 => '☭', 1650 => '☮', 1651 => '☯', 1652 => '☸', 1653 => '☹', 1654 => '☺', 1655 => '☻', 1656 => '☼', 1657 => '☽', 1658 => '☾', 1659 => '☿', 1660 => '♀', 1661 => '♁', 1662 => '♂', 1663 => '♃', 1664 => '♄', 1665 => '♅', 1666 => '♆', 1667 => '♇', 1668 => '♈', 1669 => '♉', 1670 => '♊', 1671 => '♋', 1672 => '♌', 1673 => '♍', 1674 => '♎', 1675 => '♏', 1676 => '♐', 1677 => '♑', 1678 => '♒', 1679 => '♓', 1680 => '♔', 1681 => '♕', 1682 => '♖', 1683 => '♗', 1684 => '♘', 1685 => '♙', 1686 => '♚', 1687 => '♛', 1688 => '♜', 1689 => '♝', 1690 => '♞', 1691 => '♟', 1692 => '♠', 1693 => '♡', 1694 => '♢', 1695 => '♣', 1696 => '♤', 1697 => '♥', 1698 => '♦', 1699 => '♧', 1700 => '♨', 1701 => '♩', 1702 => '♪', 1703 => '♫', 1704 => '♬', 1705 => '♰', 1706 => '♱', 1707 => '♲', 1708 => '♳', 1709 => '♴', 1710 => '♵', 1711 => '♶', 1712 => '♷', 1713 => '♸', 1714 => '♹', 1715 => '♺', 1716 => '♻', 1717 => '♼', 1718 => '♽', 1719 => '♾', 1720 => '♿', 1721 => '⚀', 1722 => '⚁', 1723 => '⚂', 1724 => '⚃', 1725 => '⚄', 1726 => '⚅', 1727 => '⚆', 1728 => '⚇', 1729 => '⚈', 1730 => '⚉', 1731 => '⚐', 1732 => '⚑', 1733 => '⚒', 1734 => '⚓', 1735 => '⚔', 1736 => '⚕', 1737 => '⚖', 1738 => '⚗', 1739 => '⚘', 1740 => '⚙', 1741 => '⚚', 1742 => '⚛', 1743 => '⚜', 1744 => '⚝', 1745 => '⚞', 1746 => '⚟', 1747 => '⚠', 1748 => '⚡', 1749 => '⚢', 1750 => '⚣', 1751 => '⚤', 1752 => '⚥', 1753 => '⚦', 1754 => '⚧', 1755 => '⚨', 1756 => '⚩', 1757 => '⚪', 1758 => '⚫', 1759 => '⚬', 1760 => '⚭', 1761 => '⚮', 1762 => '⚯', 1763 => '⚰', 1764 => '⚱', 1765 => '⚲', 1766 => '⚳', 1767 => '⚴', 1768 => '⚵', 1769 => '⚶', 1770 => '⚷', 1771 => '⚸', 1772 => '⚹', 1773 => '⚺', 1774 => '⚻', 1775 => '⚼', 1776 => '⚽', 1777 => '⚾', 1778 => '⚿', 1779 => '⛀', 1780 => '⛁', 1781 => '⛂', 1782 => '⛃', 1783 => '⛄', 1784 => '⛅', 1785 => '⛆', 1786 => '⛇', 1787 => '⛈', 1788 => '⛉', 1789 => '⛊', 1790 => '⛋', 1791 => '⛌', 1792 => '⛍', 1793 => '⛎', 1794 => '⛏', 1795 => '⛐', 1796 => '⛑', 1797 => '⛒', 1798 => '⛓', 1799 => '⛔', 1800 => '⛕', 1801 => '⛖', 1802 => '⛗', 1803 => '⛘', 1804 => '⛙', 1805 => '⛚', 1806 => '⛛', 1807 => '⛜', 1808 => '⛝', 1809 => '⛞', 1810 => '⛟', 1811 => '⛠', 1812 => '⛡', 1813 => '⛢', 1814 => '⛣', 1815 => '⛤', 1816 => '⛥', 1817 => '⛦', 1818 => '⛧', 1819 => '⛨', 1820 => '⛩', 1821 => '⛪', 1822 => '⛫', 1823 => '⛬', 1824 => '⛭', 1825 => '⛮', 1826 => '⛯', 1827 => '⛰', 1828 => '⛱', 1829 => '⛲', 1830 => '⛳', 1831 => '⛴', 1832 => '⛵', 1833 => '⛶', 1834 => '⛷', 1835 => '⛸', 1836 => '⛹', 1837 => '⛺', 1838 => '⛻', 1839 => '⛼', 1840 => '⛽', 1841 => '⛾', 1842 => '⛿', 1843 => '✁', 1844 => '✂', 1845 => '✃', 1846 => '✄', 1847 => '✅', 1848 => '✆', 1849 => '✇', 1850 => '✈', 1851 => '✉', 1852 => '✊', 1853 => '✋', 1854 => '✌', 1855 => '✍', 1856 => '✎', 1857 => '✏', 1858 => '✐', 1859 => '✑', 1860 => '✒', 1861 => '✓', 1862 => '✔', 1863 => '✕', 1864 => '✖', 1865 => '✗', 1866 => '✘', 1867 => '✙', 1868 => '✚', 1869 => '✛', 1870 => '✜', 1871 => '✝', 1872 => '✞', 1873 => '✟', 1874 => '✠', 1875 => '✡', 1876 => '✢', 1877 => '✣', 1878 => '✤', 1879 => '✥', 1880 => '✦', 1881 => '✧', 1882 => '✨', 1883 => '✩', 1884 => '✪', 1885 => '✫', 1886 => '✬', 1887 => '✭', 1888 => '✮', 1889 => '✯', 1890 => '✰', 1891 => '✱', 1892 => '✲', 1893 => '✳', 1894 => '✴', 1895 => '✵', 1896 => '✶', 1897 => '✷', 1898 => '✸', 1899 => '✹', 1900 => '✺', 1901 => '✻', 1902 => '✼', 1903 => '✽', 1904 => '✾', 1905 => '✿', 1906 => '❀', 1907 => '❁', 1908 => '❂', 1909 => '❃', 1910 => '❄', 1911 => '❅', 1912 => '❆', 1913 => '❇', 1914 => '❈', 1915 => '❉', 1916 => '❊', 1917 => '❋', 1918 => '❌', 1919 => '❍', 1920 => '❎', 1921 => '❏', 1922 => '❐', 1923 => '❑', 1924 => '❒', 1925 => '❓', 1926 => '❔', 1927 => '❕', 1928 => '❖', 1929 => '❗', 1930 => '❘', 1931 => '❙', 1932 => '❚', 1933 => '❛', 1934 => '❜', 1935 => '❝', 1936 => '❞', 1937 => '❟', 1938 => '❠', 1939 => '❡', 1940 => '❢', 1941 => '❣', 1942 => '❤', 1943 => '❥', 1944 => '❦', 1945 => '❧', 1946 => '❨', 1947 => '❩', 1948 => '❪', 1949 => '❫', 1950 => '❬', 1951 => '❭', 1952 => '❮', 1953 => '❯', 1954 => '❰', 1955 => '❱', 1956 => '❲', 1957 => '❳', 1958 => '❴', 1959 => '❵', 1960 => '➔', 1961 => '➕', 1962 => '➖', 1963 => '➗', 1964 => '➘', 1965 => '➙', 1966 => '➚', 1967 => '➛', 1968 => '➜', 1969 => '➝', 1970 => '➞', 1971 => '➟', 1972 => '➠', 1973 => '➡', 1974 => '➢', 1975 => '➣', 1976 => '➤', 1977 => '➥', 1978 => '➦', 1979 => '➧', 1980 => '➨', 1981 => '➩', 1982 => '➪', 1983 => '➫', 1984 => '➬', 1985 => '➭', 1986 => '➮', 1987 => '➯', 1988 => '➰', 1989 => '➱', 1990 => '➲', 1991 => '➳', 1992 => '➴', 1993 => '➵', 1994 => '➶', 1995 => '➷', 1996 => '➸', 1997 => '➹', 1998 => '➺', 1999 => '➻', 2000 => '➼', 2001 => '➽', 2002 => '➾', 2003 => '➿', 2004 => '⟀', 2005 => '⟁', 2006 => '⟂', 2007 => '⟃', 2008 => '⟄', 2009 => '⟅', 2010 => '⟆', 2011 => '⟇', 2012 => '⟈', 2013 => '⟉', 2014 => '⟊', 2015 => '⟌', 2016 => '⟎', 2017 => '⟏', 2018 => '⟐', 2019 => '⟑', 2020 => '⟒', 2021 => '⟓', 2022 => '⟔', 2023 => '⟕', 2024 => '⟖', 2025 => '⟗', 2026 => '⟘', 2027 => '⟙', 2028 => '⟚', 2029 => '⟛', 2030 => '⟜', 2031 => '⟝', 2032 => '⟞', 2033 => '⟟', 2034 => '⟠', 2035 => '⟡', 2036 => '⟢', 2037 => '⟣', 2038 => '⟤', 2039 => '⟥', 2040 => '⟦', 2041 => '⟧', 2042 => '⟨', 2043 => '⟩', 2044 => '⟪', 2045 => '⟫', 2046 => '⟰', 2047 => '⟱', 2048 => '⟲', 2049 => '⟳', 2050 => '⟴', 2051 => '⟵', 2052 => '⟶', 2053 => '⟷', 2054 => '⟸', 2055 => '⟹', 2056 => '⟺', 2057 => '⟻', 2058 => '⟼', 2059 => '⟽', 2060 => '⟾', 2061 => '⟿', 2062 => '⤀', 2063 => '⤁', 2064 => '⤂', 2065 => '⤃', 2066 => '⤄', 2067 => '⤅', 2068 => '⤆', 2069 => '⤇', 2070 => '⤈', 2071 => '⤉', 2072 => '⤊', 2073 => '⤋', 2074 => '⤌', 2075 => '⤍', 2076 => '⤎', 2077 => '⤏', 2078 => '⤐', 2079 => '⤑', 2080 => '⤒', 2081 => '⤓', 2082 => '⤔', 2083 => '⤕', 2084 => '⤖', 2085 => '⤗', 2086 => '⤘', 2087 => '⤙', 2088 => '⤚', 2089 => '⤛', 2090 => '⤜', 2091 => '⤝', 2092 => '⤞', 2093 => '⤟', 2094 => '⤠', 2095 => '⤡', 2096 => '⤢', 2097 => '⤣', 2098 => '⤤', 2099 => '⤥', 2100 => '⤦', 2101 => '⤧', 2102 => '⤨', 2103 => '⤩', 2104 => '⤪', 2105 => '⤫', 2106 => '⤬', 2107 => '⤭', 2108 => '⤮', 2109 => '⤯', 2110 => '⤰', 2111 => '⤱', 2112 => '⤲', 2113 => '⤳', 2114 => '⤴', 2115 => '⤵', 2116 => '⤶', 2117 => '⤷', 2118 => '⤸', 2119 => '⤹', 2120 => '⤺', 2121 => '⤻', 2122 => '⤼', 2123 => '⤽', 2124 => '⤾', 2125 => '⤿', 2126 => '⥀', 2127 => '⥁', 2128 => '⥂', 2129 => '⥃', 2130 => '⥄', 2131 => '⥅', 2132 => '⥆', 2133 => '⥇', 2134 => '⥈', 2135 => '⥉', 2136 => '⥊', 2137 => '⥋', 2138 => '⥌', 2139 => '⥍', 2140 => '⥎', 2141 => '⥏', 2142 => '⥐', 2143 => '⥑', 2144 => '⥒', 2145 => '⥓', 2146 => '⥔', 2147 => '⥕', 2148 => '⥖', 2149 => '⥗', 2150 => '⥘', 2151 => '⥙', 2152 => '⥚', 2153 => '⥛', 2154 => '⥜', 2155 => '⥝', 2156 => '⥞', 2157 => '⥟', 2158 => '⥠', 2159 => '⥡', 2160 => '⥢', 2161 => '⥣', 2162 => '⥤', 2163 => '⥥', 2164 => '⥦', 2165 => '⥧', 2166 => '⥨', 2167 => '⥩', 2168 => '⥪', 2169 => '⥫', 2170 => '⥬', 2171 => '⥭', 2172 => '⥮', 2173 => '⥯', 2174 => '⥰', 2175 => '⥱', 2176 => '⥲', 2177 => '⥳', 2178 => '⥴', 2179 => '⥵', 2180 => '⥶', 2181 => '⥷', 2182 => '⥸', 2183 => '⥹', 2184 => '⥺', 2185 => '⥻', 2186 => '⥼', 2187 => '⥽', 2188 => '⥾', 2189 => '⥿', 2190 => '⦀', 2191 => '⦁', 2192 => '⦂', 2193 => '⦙', 2194 => '⦚', 2195 => '⦛', 2196 => '⦜', 2197 => '⦝', 2198 => '⦞', 2199 => '⦟', 2200 => '⦠', 2201 => '⦡', 2202 => '⦢', 2203 => '⦣', 2204 => '⦤', 2205 => '⦥', 2206 => '⦦', 2207 => '⦧', 2208 => '⦨', 2209 => '⦩', 2210 => '⦪', 2211 => '⦫', 2212 => '⦬', 2213 => '⦭', 2214 => '⦮', 2215 => '⦯', 2216 => '⦰', 2217 => '⦱', 2218 => '⦲', 2219 => '⦳', 2220 => '⦴', 2221 => '⦵', 2222 => '⦶', 2223 => '⦷', 2224 => '⦸', 2225 => '⦹', 2226 => '⦺', 2227 => '⦻', 2228 => '⦼', 2229 => '⦽', 2230 => '⦾', 2231 => '⦿', 2232 => '⧀', 2233 => '⧁', 2234 => '⧂', 2235 => '⧃', 2236 => '⧄', 2237 => '⧅', 2238 => '⧆', 2239 => '⧇', 2240 => '⧈', 2241 => '⧉', 2242 => '⧊', 2243 => '⧋', 2244 => '⧌', 2245 => '⧍', 2246 => '⧎', 2247 => '⧏', 2248 => '⧐', 2249 => '⧑', 2250 => '⧒', 2251 => '⧓', 2252 => '⧔', 2253 => '⧕', 2254 => '⧖', 2255 => '⧗', 2256 => '⧘', 2257 => '⧙', 2258 => '⧚', 2259 => '⧛', 2260 => '⧜', 2261 => '⧝', 2262 => '⧞', 2263 => '⧟', 2264 => '⧠', 2265 => '⧡', 2266 => '⧢', 2267 => '⧣', 2268 => '⧤', 2269 => '⧥', 2270 => '⧦', 2271 => '⧧', 2272 => '⧨', 2273 => '⧩', 2274 => '⧪', 2275 => '⧫', 2276 => '⧬', 2277 => '⧭', 2278 => '⧮', 2279 => '⧯', 2280 => '⧰', 2281 => '⧱', 2282 => '⧲', 2283 => '⧳', 2284 => '⧴', 2285 => '⧵', 2286 => '⧶', 2287 => '⧷', 2288 => '⧸', 2289 => '⧹', 2290 => '⧺', 2291 => '⧻', 2292 => '⧾', 2293 => '⧿', 2294 => '⨀', 2295 => '⨁', 2296 => '⨂', 2297 => '⨃', 2298 => '⨄', 2299 => '⨅', 2300 => '⨆', 2301 => '⨇', 2302 => '⨈', 2303 => '⨉', 2304 => '⨊', 2305 => '⨋', 2306 => '⨍', 2307 => '⨎', 2308 => '⨏', 2309 => '⨐', 2310 => '⨑', 2311 => '⨒', 2312 => '⨓', 2313 => '⨔', 2314 => '⨕', 2315 => '⨖', 2316 => '⨗', 2317 => '⨘', 2318 => '⨙', 2319 => '⨚', 2320 => '⨛', 2321 => '⨜', 2322 => '⨝', 2323 => '⨞', 2324 => '⨟', 2325 => '⨠', 2326 => '⨡', 2327 => '⨢', 2328 => '⨣', 2329 => '⨤', 2330 => '⨥', 2331 => '⨦', 2332 => '⨧', 2333 => '⨨', 2334 => '⨩', 2335 => '⨪', 2336 => '⨫', 2337 => '⨬', 2338 => '⨭', 2339 => '⨮', 2340 => '⨯', 2341 => '⨰', 2342 => '⨱', 2343 => '⨲', 2344 => '⨳', 2345 => '⨴', 2346 => '⨵', 2347 => '⨶', 2348 => '⨷', 2349 => '⨸', 2350 => '⨹', 2351 => '⨺', 2352 => '⨻', 2353 => '⨼', 2354 => '⨽', 2355 => '⨾', 2356 => '⨿', 2357 => '⩀', 2358 => '⩁', 2359 => '⩂', 2360 => '⩃', 2361 => '⩄', 2362 => '⩅', 2363 => '⩆', 2364 => '⩇', 2365 => '⩈', 2366 => '⩉', 2367 => '⩊', 2368 => '⩋', 2369 => '⩌', 2370 => '⩍', 2371 => '⩎', 2372 => '⩏', 2373 => '⩐', 2374 => '⩑', 2375 => '⩒', 2376 => '⩓', 2377 => '⩔', 2378 => '⩕', 2379 => '⩖', 2380 => '⩗', 2381 => '⩘', 2382 => '⩙', 2383 => '⩚', 2384 => '⩛', 2385 => '⩜', 2386 => '⩝', 2387 => '⩞', 2388 => '⩟', 2389 => '⩠', 2390 => '⩡', 2391 => '⩢', 2392 => '⩣', 2393 => '⩤', 2394 => '⩥', 2395 => '⩦', 2396 => '⩧', 2397 => '⩨', 2398 => '⩩', 2399 => '⩪', 2400 => '⩫', 2401 => '⩬', 2402 => '⩭', 2403 => '⩮', 2404 => '⩯', 2405 => '⩰', 2406 => '⩱', 2407 => '⩲', 2408 => '⩳', 2409 => '⩷', 2410 => '⩸', 2411 => '⩹', 2412 => '⩺', 2413 => '⩻', 2414 => '⩼', 2415 => '⩽', 2416 => '⩾', 2417 => '⩿', 2418 => '⪀', 2419 => '⪁', 2420 => '⪂', 2421 => '⪃', 2422 => '⪄', 2423 => '⪅', 2424 => '⪆', 2425 => '⪇', 2426 => '⪈', 2427 => '⪉', 2428 => '⪊', 2429 => '⪋', 2430 => '⪌', 2431 => '⪍', 2432 => '⪎', 2433 => '⪏', 2434 => '⪐', 2435 => '⪑', 2436 => '⪒', 2437 => '⪓', 2438 => '⪔', 2439 => '⪕', 2440 => '⪖', 2441 => '⪗', 2442 => '⪘', 2443 => '⪙', 2444 => '⪚', 2445 => '⪛', 2446 => '⪜', 2447 => '⪝', 2448 => '⪞', 2449 => '⪟', 2450 => '⪠', 2451 => '⪡', 2452 => '⪢', 2453 => '⪣', 2454 => '⪤', 2455 => '⪥', 2456 => '⪦', 2457 => '⪧', 2458 => '⪨', 2459 => '⪩', 2460 => '⪪', 2461 => '⪫', 2462 => '⪬', 2463 => '⪭', 2464 => '⪮', 2465 => '⪯', 2466 => '⪰', 2467 => '⪱', 2468 => '⪲', 2469 => '⪳', 2470 => '⪴', 2471 => '⪵', 2472 => '⪶', 2473 => '⪷', 2474 => '⪸', 2475 => '⪹', 2476 => '⪺', 2477 => '⪻', 2478 => '⪼', 2479 => '⪽', 2480 => '⪾', 2481 => '⪿', 2482 => '⫀', 2483 => '⫁', 2484 => '⫂', 2485 => '⫃', 2486 => '⫄', 2487 => '⫅', 2488 => '⫆', 2489 => '⫇', 2490 => '⫈', 2491 => '⫉', 2492 => '⫊', 2493 => '⫋', 2494 => '⫌', 2495 => '⫍', 2496 => '⫎', 2497 => '⫏', 2498 => '⫐', 2499 => '⫑', 2500 => '⫒', 2501 => '⫓', 2502 => '⫔', 2503 => '⫕', 2504 => '⫖', 2505 => '⫗', 2506 => '⫘', 2507 => '⫙', 2508 => '⫚', 2509 => '⫛', 2510 => '⫝', 2511 => '⫞', 2512 => '⫟', 2513 => '⫠', 2514 => '⫡', 2515 => '⫢', 2516 => '⫣', 2517 => '⫤', 2518 => '⫥', 2519 => '⫦', 2520 => '⫧', 2521 => '⫨', 2522 => '⫩', 2523 => '⫪', 2524 => '⫫', 2525 => '⫬', 2526 => '⫭', 2527 => '⫮', 2528 => '⫯', 2529 => '⫰', 2530 => '⫱', 2531 => '⫲', 2532 => '⫳', 2533 => '⫴', 2534 => '⫵', 2535 => '⫶', 2536 => '⫷', 2537 => '⫸', 2538 => '⫹', 2539 => '⫺', 2540 => '⫻', 2541 => '⫼', 2542 => '⫽', 2543 => '⫾', 2544 => '⫿', 2545 => '⬀', 2546 => '⬁', 2547 => '⬂', 2548 => '⬃', 2549 => '⬄', 2550 => '⬅', 2551 => '⬆', 2552 => '⬇', 2553 => '⬈', 2554 => '⬉', 2555 => '⬊', 2556 => '⬋', 2557 => '⬌', 2558 => '⬍', 2559 => '⬎', 2560 => '⬏', 2561 => '⬐', 2562 => '⬑', 2563 => '⬒', 2564 => '⬓', 2565 => '⬔', 2566 => '⬕', 2567 => '⬖', 2568 => '⬗', 2569 => '⬘', 2570 => '⬙', 2571 => '⬚', 2572 => '⬛', 2573 => '⬜', 2574 => '⬝', 2575 => '⬞', 2576 => '⬟', 2577 => '⬠', 2578 => '⬡', 2579 => '⬢', 2580 => '⬣', 2581 => '⬤', 2582 => '⬥', 2583 => '⬦', 2584 => '⬧', 2585 => '⬨', 2586 => '⬩', 2587 => '⬪', 2588 => '⬫', 2589 => '⬬', 2590 => '⬭', 2591 => '⬮', 2592 => '⬯', 2593 => '⬰', 2594 => '⬱', 2595 => '⬲', 2596 => '⬳', 2597 => '⬴', 2598 => '⬵', 2599 => '⬶', 2600 => '⬷', 2601 => '⬸', 2602 => '⬹', 2603 => '⬺', 2604 => '⬻', 2605 => '⬼', 2606 => '⬽', 2607 => '⬾', 2608 => '⬿', 2609 => '⭀', 2610 => '⭁', 2611 => '⭂', 2612 => '⭃', 2613 => '⭄', 2614 => '⭅', 2615 => '⭆', 2616 => '⭇', 2617 => '⭈', 2618 => '⭉', 2619 => '⭊', 2620 => '⭋', 2621 => '⭌', 2622 => '⭐', 2623 => '⭑', 2624 => '⭒', 2625 => '⭓', 2626 => '⭔', 2627 => '⭕', 2628 => '⭖', 2629 => '⭗', 2630 => '⭘', 2631 => '⭙', 2632 => '⳥', 2633 => '⳦', 2634 => '⳧', 2635 => '⳨', 2636 => '⳩', 2637 => '⳪', 2638 => '⠀', 2639 => '⠁', 2640 => '⠂', 2641 => '⠃', 2642 => '⠄', 2643 => '⠅', 2644 => '⠆', 2645 => '⠇', 2646 => '⠈', 2647 => '⠉', 2648 => '⠊', 2649 => '⠋', 2650 => '⠌', 2651 => '⠍', 2652 => '⠎', 2653 => '⠏', 2654 => '⠐', 2655 => '⠑', 2656 => '⠒', 2657 => '⠓', 2658 => '⠔', 2659 => '⠕', 2660 => '⠖', 2661 => '⠗', 2662 => '⠘', 2663 => '⠙', 2664 => '⠚', 2665 => '⠛', 2666 => '⠜', 2667 => '⠝', 2668 => '⠞', 2669 => '⠟', 2670 => '⠠', 2671 => '⠡', 2672 => '⠢', 2673 => '⠣', 2674 => '⠤', 2675 => '⠥', 2676 => '⠦', 2677 => '⠧', 2678 => '⠨', 2679 => '⠩', 2680 => '⠪', 2681 => '⠫', 2682 => '⠬', 2683 => '⠭', 2684 => '⠮', 2685 => '⠯', 2686 => '⠰', 2687 => '⠱', 2688 => '⠲', 2689 => '⠳', 2690 => '⠴', 2691 => '⠵', 2692 => '⠶', 2693 => '⠷', 2694 => '⠸', 2695 => '⠹', 2696 => '⠺', 2697 => '⠻', 2698 => '⠼', 2699 => '⠽', 2700 => '⠾', 2701 => '⠿', 2702 => '⡀', 2703 => '⡁', 2704 => '⡂', 2705 => '⡃', 2706 => '⡄', 2707 => '⡅', 2708 => '⡆', 2709 => '⡇', 2710 => '⡈', 2711 => '⡉', 2712 => '⡊', 2713 => '⡋', 2714 => '⡌', 2715 => '⡍', 2716 => '⡎', 2717 => '⡏', 2718 => '⡐', 2719 => '⡑', 2720 => '⡒', 2721 => '⡓', 2722 => '⡔', 2723 => '⡕', 2724 => '⡖', 2725 => '⡗', 2726 => '⡘', 2727 => '⡙', 2728 => '⡚', 2729 => '⡛', 2730 => '⡜', 2731 => '⡝', 2732 => '⡞', 2733 => '⡟', 2734 => '⡠', 2735 => '⡡', 2736 => '⡢', 2737 => '⡣', 2738 => '⡤', 2739 => '⡥', 2740 => '⡦', 2741 => '⡧', 2742 => '⡨', 2743 => '⡩', 2744 => '⡪', 2745 => '⡫', 2746 => '⡬', 2747 => '⡭', 2748 => '⡮', 2749 => '⡯', 2750 => '⡰', 2751 => '⡱', 2752 => '⡲', 2753 => '⡳', 2754 => '⡴', 2755 => '⡵', 2756 => '⡶', 2757 => '⡷', 2758 => '⡸', 2759 => '⡹', 2760 => '⡺', 2761 => '⡻', 2762 => '⡼', 2763 => '⡽', 2764 => '⡾', 2765 => '⡿', 2766 => '⢀', 2767 => '⢁', 2768 => '⢂', 2769 => '⢃', 2770 => '⢄', 2771 => '⢅', 2772 => '⢆', 2773 => '⢇', 2774 => '⢈', 2775 => '⢉', 2776 => '⢊', 2777 => '⢋', 2778 => '⢌', 2779 => '⢍', 2780 => '⢎', 2781 => '⢏', 2782 => '⢐', 2783 => '⢑', 2784 => '⢒', 2785 => '⢓', 2786 => '⢔', 2787 => '⢕', 2788 => '⢖', 2789 => '⢗', 2790 => '⢘', 2791 => '⢙', 2792 => '⢚', 2793 => '⢛', 2794 => '⢜', 2795 => '⢝', 2796 => '⢞', 2797 => '⢟', 2798 => '⢠', 2799 => '⢡', 2800 => '⢢', 2801 => '⢣', 2802 => '⢤', 2803 => '⢥', 2804 => '⢦', 2805 => '⢧', 2806 => '⢨', 2807 => '⢩', 2808 => '⢪', 2809 => '⢫', 2810 => '⢬', 2811 => '⢭', 2812 => '⢮', 2813 => '⢯', 2814 => '⢰', 2815 => '⢱', 2816 => '⢲', 2817 => '⢳', 2818 => '⢴', 2819 => '⢵', 2820 => '⢶', 2821 => '⢷', 2822 => '⢸', 2823 => '⢹', 2824 => '⢺', 2825 => '⢻', 2826 => '⢼', 2827 => '⢽', 2828 => '⢾', 2829 => '⢿', 2830 => '⣀', 2831 => '⣁', 2832 => '⣂', 2833 => '⣃', 2834 => '⣄', 2835 => '⣅', 2836 => '⣆', 2837 => '⣇', 2838 => '⣈', 2839 => '⣉', 2840 => '⣊', 2841 => '⣋', 2842 => '⣌', 2843 => '⣍', 2844 => '⣎', 2845 => '⣏', 2846 => '⣐', 2847 => '⣑', 2848 => '⣒', 2849 => '⣓', 2850 => '⣔', 2851 => '⣕', 2852 => '⣖', 2853 => '⣗', 2854 => '⣘', 2855 => '⣙', 2856 => '⣚', 2857 => '⣛', 2858 => '⣜', 2859 => '⣝', 2860 => '⣞', 2861 => '⣟', 2862 => '⣠', 2863 => '⣡', 2864 => '⣢', 2865 => '⣣', 2866 => '⣤', 2867 => '⣥', 2868 => '⣦', 2869 => '⣧', 2870 => '⣨', 2871 => '⣩', 2872 => '⣪', 2873 => '⣫', 2874 => '⣬', 2875 => '⣭', 2876 => '⣮', 2877 => '⣯', 2878 => '⣰', 2879 => '⣱', 2880 => '⣲', 2881 => '⣳', 2882 => '⣴', 2883 => '⣵', 2884 => '⣶', 2885 => '⣷', 2886 => '⣸', 2887 => '⣹', 2888 => '⣺', 2889 => '⣻', 2890 => '⣼', 2891 => '⣽', 2892 => '⣾', 2893 => '⣿', 2894 => '⚊', 2895 => '⚋', 2896 => '⚌', 2897 => '⚍', 2898 => '⚎', 2899 => '⚏', 2900 => '☰', 2901 => '☱', 2902 => '☲', 2903 => '☳', 2904 => '☴', 2905 => '☵', 2906 => '☶', 2907 => '☷', 2908 => '䷀', 2909 => '䷁', 2910 => '䷂', 2911 => '䷃', 2912 => '䷄', 2913 => '䷅', 2914 => '䷆', 2915 => '䷇', 2916 => '䷈', 2917 => '䷉', 2918 => '䷊', 2919 => '䷋', 2920 => '䷌', 2921 => '䷍', 2922 => '䷎', 2923 => '䷏', 2924 => '䷐', 2925 => '䷑', 2926 => '䷒', 2927 => '䷓', 2928 => '䷔', 2929 => '䷕', 2930 => '䷖', 2931 => '䷗', 2932 => '䷘', 2933 => '䷙', 2934 => '䷚', 2935 => '䷛', 2936 => '䷜', 2937 => '䷝', 2938 => '䷞', 2939 => '䷟', 2940 => '䷠', 2941 => '䷡', 2942 => '䷢', 2943 => '䷣', 2944 => '䷤', 2945 => '䷥', 2946 => '䷦', 2947 => '䷧', 2948 => '䷨', 2949 => '䷩', 2950 => '䷪', 2951 => '䷫', 2952 => '䷬', 2953 => '䷭', 2954 => '䷮', 2955 => '䷯', 2956 => '䷰', 2957 => '䷱', 2958 => '䷲', 2959 => '䷳', 2960 => '䷴', 2961 => '䷵', 2962 => '䷶', 2963 => '䷷', 2964 => '䷸', 2965 => '䷹', 2966 => '䷺', 2967 => '䷻', 2968 => '䷼', 2969 => '䷽', 2970 => '䷾', 2971 => '䷿', 2972 => '𝌀', 2973 => '𝌁', 2974 => '𝌂', 2975 => '𝌃', 2976 => '𝌄', 2977 => '𝌅', 2978 => '𝌆', 2979 => '𝌇', 2980 => '𝌈', 2981 => '𝌉', 2982 => '𝌊', 2983 => '𝌋', 2984 => '𝌌', 2985 => '𝌍', 2986 => '𝌎', 2987 => '𝌏', 2988 => '𝌐', 2989 => '𝌑', 2990 => '𝌒', 2991 => '𝌓', 2992 => '𝌔', 2993 => '𝌕', 2994 => '𝌖', 2995 => '𝌗', 2996 => '𝌘', 2997 => '𝌙', 2998 => '𝌚', 2999 => '𝌛', 3000 => '𝌜', 3001 => '𝌝', 3002 => '𝌞', 3003 => '𝌟', 3004 => '𝌠', 3005 => '𝌡', 3006 => '𝌢', 3007 => '𝌣', 3008 => '𝌤', 3009 => '𝌥', 3010 => '𝌦', 3011 => '𝌧', 3012 => '𝌨', 3013 => '𝌩', 3014 => '𝌪', 3015 => '𝌫', 3016 => '𝌬', 3017 => '𝌭', 3018 => '𝌮', 3019 => '𝌯', 3020 => '𝌰', 3021 => '𝌱', 3022 => '𝌲', 3023 => '𝌳', 3024 => '𝌴', 3025 => '𝌵', 3026 => '𝌶', 3027 => '𝌷', 3028 => '𝌸', 3029 => '𝌹', 3030 => '𝌺', 3031 => '𝌻', 3032 => '𝌼', 3033 => '𝌽', 3034 => '𝌾', 3035 => '𝌿', 3036 => '𝍀', 3037 => '𝍁', 3038 => '𝍂', 3039 => '𝍃', 3040 => '𝍄', 3041 => '𝍅', 3042 => '𝍆', 3043 => '𝍇', 3044 => '𝍈', 3045 => '𝍉', 3046 => '𝍊', 3047 => '𝍋', 3048 => '𝍌', 3049 => '𝍍', 3050 => '𝍎', 3051 => '𝍏', 3052 => '𝍐', 3053 => '𝍑', 3054 => '𝍒', 3055 => '𝍓', 3056 => '𝍔', 3057 => '𝍕', 3058 => '𝍖', 3059 => '꒐', 3060 => '꒑', 3061 => '꒒', 3062 => '꒓', 3063 => '꒔', 3064 => '꒕', 3065 => '꒖', 3066 => '꒗', 3067 => '꒘', 3068 => '꒙', 3069 => '꒚', 3070 => '꒛', 3071 => '꒜', 3072 => '꒝', 3073 => '꒞', 3074 => '꒟', 3075 => '꒠', 3076 => '꒡', 3077 => '꒢', 3078 => '꒣', 3079 => '꒤', 3080 => '꒥', 3081 => '꒦', 3082 => '꒧', 3083 => '꒨', 3084 => '꒩', 3085 => '꒪', 3086 => '꒫', 3087 => '꒬', 3088 => '꒭', 3089 => '꒮', 3090 => '꒯', 3091 => '꒰', 3092 => '꒱', 3093 => '꒲', 3094 => '꒳', 3095 => '꒴', 3096 => '꒵', 3097 => '꒶', 3098 => '꒷', 3099 => '꒸', 3100 => '꒹', 3101 => '꒺', 3102 => '꒻', 3103 => '꒼', 3104 => '꒽', 3105 => '꒾', 3106 => '꒿', 3107 => '꓀', 3108 => '꓁', 3109 => '꓂', 3110 => '꓃', 3111 => '꓄', 3112 => '꓅', 3113 => '꓆', 3114 => '𐄷', 3115 => '𐄸', 3116 => '𐄹', 3117 => '𐄺', 3118 => '𐄻', 3119 => '𐄼', 3120 => '𐄽', 3121 => '𐄾', 3122 => '𐄿', 3123 => '𐅹', 3124 => '𐅺', 3125 => '𐅻', 3126 => '𐅼', 3127 => '𐅽', 3128 => '𐅾', 3129 => '𐅿', 3130 => '𐆀', 3131 => '𐆁', 3132 => '𐆂', 3133 => '𐆃', 3134 => '𐆄', 3135 => '𐆅', 3136 => '𐆆', 3137 => '𐆇', 3138 => '𐆈', 3139 => '𐆉', 3140 => '𐆐', 3141 => '𐆑', 3142 => '𐆒', 3143 => '𐆓', 3144 => '𐆔', 3145 => '𐆕', 3146 => '𐆖', 3147 => '𐆗', 3148 => '𐆘', 3149 => '𐆙', 3150 => '𐆚', 3151 => '𐆛', 3152 => '𐇐', 3153 => '𐇑', 3154 => '𐇒', 3155 => '𐇓', 3156 => '𐇔', 3157 => '𐇕', 3158 => '𐇖', 3159 => '𐇗', 3160 => '𐇘', 3161 => '𐇙', 3162 => '𐇚', 3163 => '𐇛', 3164 => '𐇜', 3165 => '𐇝', 3166 => '𐇞', 3167 => '𐇟', 3168 => '𐇠', 3169 => '𐇡', 3170 => '𐇢', 3171 => '𐇣', 3172 => '𐇤', 3173 => '𐇥', 3174 => '𐇦', 3175 => '𐇧', 3176 => '𐇨', 3177 => '𐇩', 3178 => '𐇪', 3179 => '𐇫', 3180 => '𐇬', 3181 => '𐇭', 3182 => '𐇮', 3183 => '𐇯', 3184 => '𐇰', 3185 => '𐇱', 3186 => '𐇲', 3187 => '𐇳', 3188 => '𐇴', 3189 => '𐇵', 3190 => '𐇶', 3191 => '𐇷', 3192 => '𐇸', 3193 => '𐇹', 3194 => '𐇺', 3195 => '𐇻', 3196 => '𐇼', 3197 => '𝀀', 3198 => '𝀁', 3199 => '𝀂', 3200 => '𝀃', 3201 => '𝀄', 3202 => '𝀅', 3203 => '𝀆', 3204 => '𝀇', 3205 => '𝀈', 3206 => '𝀉', 3207 => '𝀊', 3208 => '𝀋', 3209 => '𝀌', 3210 => '𝀍', 3211 => '𝀎', 3212 => '𝀏', 3213 => '𝀐', 3214 => '𝀑', 3215 => '𝀒', 3216 => '𝀓', 3217 => '𝀔', 3218 => '𝀕', 3219 => '𝀖', 3220 => '𝀗', 3221 => '𝀘', 3222 => '𝀙', 3223 => '𝀚', 3224 => '𝀛', 3225 => '𝀜', 3226 => '𝀝', 3227 => '𝀞', 3228 => '𝀟', 3229 => '𝀠', 3230 => '𝀡', 3231 => '𝀢', 3232 => '𝀣', 3233 => '𝀤', 3234 => '𝀥', 3235 => '𝀦', 3236 => '𝀧', 3237 => '𝀨', 3238 => '𝀩', 3239 => '𝀪', 3240 => '𝀫', 3241 => '𝀬', 3242 => '𝀭', 3243 => '𝀮', 3244 => '𝀯', 3245 => '𝀰', 3246 => '𝀱', 3247 => '𝀲', 3248 => '𝀳', 3249 => '𝀴', 3250 => '𝀵', 3251 => '𝀶', 3252 => '𝀷', 3253 => '𝀸', 3254 => '𝀹', 3255 => '𝀺', 3256 => '𝀻', 3257 => '𝀼', 3258 => '𝀽', 3259 => '𝀾', 3260 => '𝀿', 3261 => '𝁀', 3262 => '𝁁', 3263 => '𝁂', 3264 => '𝁃', 3265 => '𝁄', 3266 => '𝁅', 3267 => '𝁆', 3268 => '𝁇', 3269 => '𝁈', 3270 => '𝁉', 3271 => '𝁊', 3272 => '𝁋', 3273 => '𝁌', 3274 => '𝁍', 3275 => '𝁎', 3276 => '𝁏', 3277 => '𝁐', 3278 => '𝁑', 3279 => '𝁒', 3280 => '𝁓', 3281 => '𝁔', 3282 => '𝁕', 3283 => '𝁖', 3284 => '𝁗', 3285 => '𝁘', 3286 => '𝁙', 3287 => '𝁚', 3288 => '𝁛', 3289 => '𝁜', 3290 => '𝁝', 3291 => '𝁞', 3292 => '𝁟', 3293 => '𝁠', 3294 => '𝁡', 3295 => '𝁢', 3296 => '𝁣', 3297 => '𝁤', 3298 => '𝁥', 3299 => '𝁦', 3300 => '𝁧', 3301 => '𝁨', 3302 => '𝁩', 3303 => '𝁪', 3304 => '𝁫', 3305 => '𝁬', 3306 => '𝁭', 3307 => '𝁮', 3308 => '𝁯', 3309 => '𝁰', 3310 => '𝁱', 3311 => '𝁲', 3312 => '𝁳', 3313 => '𝁴', 3314 => '𝁵', 3315 => '𝁶', 3316 => '𝁷', 3317 => '𝁸', 3318 => '𝁹', 3319 => '𝁺', 3320 => '𝁻', 3321 => '𝁼', 3322 => '𝁽', 3323 => '𝁾', 3324 => '𝁿', 3325 => '𝂀', 3326 => '𝂁', 3327 => '𝂂', 3328 => '𝂃', 3329 => '𝂄', 3330 => '𝂅', 3331 => '𝂆', 3332 => '𝂇', 3333 => '𝂈', 3334 => '𝂉', 3335 => '𝂊', 3336 => '𝂋', 3337 => '𝂌', 3338 => '𝂍', 3339 => '𝂎', 3340 => '𝂏', 3341 => '𝂐', 3342 => '𝂑', 3343 => '𝂒', 3344 => '𝂓', 3345 => '𝂔', 3346 => '𝂕', 3347 => '𝂖', 3348 => '𝂗', 3349 => '𝂘', 3350 => '𝂙', 3351 => '𝂚', 3352 => '𝂛', 3353 => '𝂜', 3354 => '𝂝', 3355 => '𝂞', 3356 => '𝂟', 3357 => '𝂠', 3358 => '𝂡', 3359 => '𝂢', 3360 => '𝂣', 3361 => '𝂤', 3362 => '𝂥', 3363 => '𝂦', 3364 => '𝂧', 3365 => '𝂨', 3366 => '𝂩', 3367 => '𝂪', 3368 => '𝂫', 3369 => '𝂬', 3370 => '𝂭', 3371 => '𝂮', 3372 => '𝂯', 3373 => '𝂰', 3374 => '𝂱', 3375 => '𝂲', 3376 => '𝂳', 3377 => '𝂴', 3378 => '𝂵', 3379 => '𝂶', 3380 => '𝂷', 3381 => '𝂸', 3382 => '𝂹', 3383 => '𝂺', 3384 => '𝂻', 3385 => '𝂼', 3386 => '𝂽', 3387 => '𝂾', 3388 => '𝂿', 3389 => '𝃀', 3390 => '𝃁', 3391 => '𝃂', 3392 => '𝃃', 3393 => '𝃄', 3394 => '𝃅', 3395 => '𝃆', 3396 => '𝃇', 3397 => '𝃈', 3398 => '𝃉', 3399 => '𝃊', 3400 => '𝃋', 3401 => '𝃌', 3402 => '𝃍', 3403 => '𝃎', 3404 => '𝃏', 3405 => '𝃐', 3406 => '𝃑', 3407 => '𝃒', 3408 => '𝃓', 3409 => '𝃔', 3410 => '𝃕', 3411 => '𝃖', 3412 => '𝃗', 3413 => '𝃘', 3414 => '𝃙', 3415 => '𝃚', 3416 => '𝃛', 3417 => '𝃜', 3418 => '𝃝', 3419 => '𝃞', 3420 => '𝃟', 3421 => '𝃠', 3422 => '𝃡', 3423 => '𝃢', 3424 => '𝃣', 3425 => '𝃤', 3426 => '𝃥', 3427 => '𝃦', 3428 => '𝃧', 3429 => '𝃨', 3430 => '𝃩', 3431 => '𝃪', 3432 => '𝃫', 3433 => '𝃬', 3434 => '𝃭', 3435 => '𝃮', 3436 => '𝃯', 3437 => '𝃰', 3438 => '𝃱', 3439 => '𝃲', 3440 => '𝃳', 3441 => '𝃴', 3442 => '𝃵', 3443 => '𝄀', 3444 => '𝄁', 3445 => '𝄂', 3446 => '𝄃', 3447 => '𝄄', 3448 => '𝄅', 3449 => '𝄆', 3450 => '𝄇', 3451 => '𝄈', 3452 => '𝄉', 3453 => '𝄊', 3454 => '𝄋', 3455 => '𝄌', 3456 => '𝄍', 3457 => '𝄎', 3458 => '𝄏', 3459 => '𝄐', 3460 => '𝄑', 3461 => '𝄒', 3462 => '𝄓', 3463 => '𝄔', 3464 => '𝄕', 3465 => '𝄖', 3466 => '𝄗', 3467 => '𝄘', 3468 => '𝄙', 3469 => '𝄚', 3470 => '𝄛', 3471 => '𝄜', 3472 => '𝄝', 3473 => '𝄞', 3474 => '𝄟', 3475 => '𝄠', 3476 => '𝄡', 3477 => '𝄢', 3478 => '𝄣', 3479 => '𝄤', 3480 => '𝄥', 3481 => '𝄦', 3482 => '♭', 3483 => '♮', 3484 => '♯', 3485 => '𝄪', 3486 => '𝄫', 3487 => '𝄬', 3488 => '𝄭', 3489 => '𝄮', 3490 => '𝄯', 3491 => '𝄰', 3492 => '𝄱', 3493 => '𝄲', 3494 => '𝄳', 3495 => '𝄴', 3496 => '𝄵', 3497 => '𝄶', 3498 => '𝄷', 3499 => '𝄸', 3500 => '𝄹', 3501 => '𝄩', 3502 => '𝄺', 3503 => '𝄻', 3504 => '𝄼', 3505 => '𝄽', 3506 => '𝄾', 3507 => '𝄿', 3508 => '𝅀', 3509 => '𝅁', 3510 => '𝅂', 3511 => '𝅃', 3512 => '𝅄', 3513 => '𝅅', 3514 => '𝅆', 3515 => '𝅇', 3516 => '𝅈', 3517 => '𝅉', 3518 => '𝅊', 3519 => '𝅋', 3520 => '𝅌', 3521 => '𝅍', 3522 => '𝅎', 3523 => '𝅏', 3524 => '𝅐', 3525 => '𝅑', 3526 => '𝅒', 3527 => '𝅓', 3528 => '𝅔', 3529 => '𝅕', 3530 => '𝅖', 3531 => '𝅗', 3532 => '𝅘', 3533 => '𝅙', 3534 => '𝅚', 3535 => '𝅛', 3536 => '𝅜', 3537 => '𝅝', 3538 => '𝅪', 3539 => '𝅫', 3540 => '𝅬', 3541 => '𝆃', 3542 => '𝆄', 3543 => '𝆌', 3544 => '𝆍', 3545 => '𝆎', 3546 => '𝆏', 3547 => '𝆐', 3548 => '𝆑', 3549 => '𝆒', 3550 => '𝆓', 3551 => '𝆔', 3552 => '𝆕', 3553 => '𝆖', 3554 => '𝆗', 3555 => '𝆘', 3556 => '𝆙', 3557 => '𝆚', 3558 => '𝆛', 3559 => '𝆜', 3560 => '𝆝', 3561 => '𝆞', 3562 => '𝆟', 3563 => '𝆠', 3564 => '𝆡', 3565 => '𝆢', 3566 => '𝆣', 3567 => '𝆤', 3568 => '𝆥', 3569 => '𝆦', 3570 => '𝆧', 3571 => '𝆨', 3572 => '𝆩', 3573 => '𝆮', 3574 => '𝆯', 3575 => '𝆰', 3576 => '𝆱', 3577 => '𝆲', 3578 => '𝆳', 3579 => '𝆴', 3580 => '𝆵', 3581 => '𝆶', 3582 => '𝆷', 3583 => '𝆸', 3584 => '𝆹', 3585 => '𝆺', 3586 => '𝇁', 3587 => '𝇂', 3588 => '𝇃', 3589 => '𝇄', 3590 => '𝇅', 3591 => '𝇆', 3592 => '𝇇', 3593 => '𝇈', 3594 => '𝇉', 3595 => '𝇊', 3596 => '𝇋', 3597 => '𝇌', 3598 => '𝇍', 3599 => '𝇎', 3600 => '𝇏', 3601 => '𝇐', 3602 => '𝇑', 3603 => '𝇒', 3604 => '𝇓', 3605 => '𝇔', 3606 => '𝇕', 3607 => '𝇖', 3608 => '𝇗', 3609 => '𝇘', 3610 => '𝇙', 3611 => '𝇚', 3612 => '𝇛', 3613 => '𝇜', 3614 => '𝇝', 3615 => '𝈀', 3616 => '𝈁', 3617 => '𝈂', 3618 => '𝈃', 3619 => '𝈄', 3620 => '𝈅', 3621 => '𝈆', 3622 => '𝈇', 3623 => '𝈈', 3624 => '𝈉', 3625 => '𝈊', 3626 => '𝈋', 3627 => '𝈌', 3628 => '𝈍', 3629 => '𝈎', 3630 => '𝈏', 3631 => '𝈐', 3632 => '𝈑', 3633 => '𝈒', 3634 => '𝈓', 3635 => '𝈔', 3636 => '𝈕', 3637 => '𝈖', 3638 => '𝈗', 3639 => '𝈘', 3640 => '𝈙', 3641 => '𝈚', 3642 => '𝈛', 3643 => '𝈜', 3644 => '𝈝', 3645 => '𝈞', 3646 => '𝈟', 3647 => '𝈠', 3648 => '𝈡', 3649 => '𝈢', 3650 => '𝈣', 3651 => '𝈤', 3652 => '𝈥', 3653 => '𝈦', 3654 => '𝈧', 3655 => '𝈨', 3656 => '𝈩', 3657 => '𝈪', 3658 => '𝈫', 3659 => '𝈬', 3660 => '𝈭', 3661 => '𝈮', 3662 => '𝈯', 3663 => '𝈰', 3664 => '𝈱', 3665 => '𝈲', 3666 => '𝈳', 3667 => '𝈴', 3668 => '𝈵', 3669 => '𝈶', 3670 => '𝈷', 3671 => '𝈸', 3672 => '𝈹', 3673 => '𝈺', 3674 => '𝈻', 3675 => '𝈼', 3676 => '𝈽', 3677 => '𝈾', 3678 => '𝈿', 3679 => '𝉀', 3680 => '𝉁', 3681 => '𝉅', 3682 => '🀀', 3683 => '🀁', 3684 => '🀂', 3685 => '🀃', 3686 => '🀄', 3687 => '🀅', 3688 => '🀆', 3689 => '🀇', 3690 => '🀈', 3691 => '🀉', 3692 => '🀊', 3693 => '🀋', 3694 => '🀌', 3695 => '🀍', 3696 => '🀎', 3697 => '🀏', 3698 => '🀐', 3699 => '🀑', 3700 => '🀒', 3701 => '🀓', 3702 => '🀔', 3703 => '🀕', 3704 => '🀖', 3705 => '🀗', 3706 => '🀘', 3707 => '🀙', 3708 => '🀚', 3709 => '🀛', 3710 => '🀜', 3711 => '🀝', 3712 => '🀞', 3713 => '🀟', 3714 => '🀠', 3715 => '🀡', 3716 => '🀢', 3717 => '🀣', 3718 => '🀤', 3719 => '🀥', 3720 => '🀦', 3721 => '🀧', 3722 => '🀨', 3723 => '🀩', 3724 => '🀪', 3725 => '🀫', 3726 => '🀰', 3727 => '🀱', 3728 => '🀲', 3729 => '🀳', 3730 => '🀴', 3731 => '🀵', 3732 => '🀶', 3733 => '🀷', 3734 => '🀸', 3735 => '🀹', 3736 => '🀺', 3737 => '🀻', 3738 => '🀼', 3739 => '🀽', 3740 => '🀾', 3741 => '🀿', 3742 => '🁀', 3743 => '🁁', 3744 => '🁂', 3745 => '🁃', 3746 => '🁄', 3747 => '🁅', 3748 => '🁆', 3749 => '🁇', 3750 => '🁈', 3751 => '🁉', 3752 => '🁊', 3753 => '🁋', 3754 => '🁌', 3755 => '🁍', 3756 => '🁎', 3757 => '🁏', 3758 => '🁐', 3759 => '🁑', 3760 => '🁒', 3761 => '🁓', 3762 => '🁔', 3763 => '🁕', 3764 => '🁖', 3765 => '🁗', 3766 => '🁘', 3767 => '🁙', 3768 => '🁚', 3769 => '🁛', 3770 => '🁜', 3771 => '🁝', 3772 => '🁞', 3773 => '🁟', 3774 => '🁠', 3775 => '🁡', 3776 => '🁢', 3777 => '🁣', 3778 => '🁤', 3779 => '🁥', 3780 => '🁦', 3781 => '🁧', 3782 => '🁨', 3783 => '🁩', 3784 => '🁪', 3785 => '🁫', 3786 => '🁬', 3787 => '🁭', 3788 => '🁮', 3789 => '🁯', 3790 => '🁰', 3791 => '🁱', 3792 => '🁲', 3793 => '🁳', 3794 => '🁴', 3795 => '🁵', 3796 => '🁶', 3797 => '🁷', 3798 => '🁸', 3799 => '🁹', 3800 => '🁺', 3801 => '🁻', 3802 => '🁼', 3803 => '🁽', 3804 => '🁾', 3805 => '🁿', 3806 => '🂀', 3807 => '🂁', 3808 => '🂂', 3809 => '🂃', 3810 => '🂄', 3811 => '🂅', 3812 => '🂆', 3813 => '🂇', 3814 => '🂈', 3815 => '🂉', 3816 => '🂊', 3817 => '🂋', 3818 => '🂌', 3819 => '🂍', 3820 => '🂎', 3821 => '🂏', 3822 => '🂐', 3823 => '🂑', 3824 => '🂒', 3825 => '🂓', 3826 => '🂠', 3827 => '🂡', 3828 => '🂢', 3829 => '🂣', 3830 => '🂤', 3831 => '🂥', 3832 => '🂦', 3833 => '🂧', 3834 => '🂨', 3835 => '🂩', 3836 => '🂪', 3837 => '🂫', 3838 => '🂬', 3839 => '🂭', 3840 => '🂮', 3841 => '🂱', 3842 => '🂲', 3843 => '🂳', 3844 => '🂴', 3845 => '🂵', 3846 => '🂶', 3847 => '🂷', 3848 => '🂸', 3849 => '🂹', 3850 => '🂺', 3851 => '🂻', 3852 => '🂼', 3853 => '🂽', 3854 => '🂾', 3855 => '🃁', 3856 => '🃂', 3857 => '🃃', 3858 => '🃄', 3859 => '🃅', 3860 => '🃆', 3861 => '🃇', 3862 => '🃈', 3863 => '🃉', 3864 => '🃊', 3865 => '🃋', 3866 => '🃌', 3867 => '🃍', 3868 => '🃎', 3869 => '🃏', 3870 => '🃑', 3871 => '🃒', 3872 => '🃓', 3873 => '🃔', 3874 => '🃕', 3875 => '🃖', 3876 => '🃗', 3877 => '🃘', 3878 => '🃙', 3879 => '🃚', 3880 => '🃛', 3881 => '🃜', 3882 => '🃝', 3883 => '🃞', 3884 => '🃟', 3885 => '🌀', 3886 => '🌁', 3887 => '🌂', 3888 => '🌃', 3889 => '🌄', 3890 => '🌅', 3891 => '🌆', 3892 => '🌇', 3893 => '🌈', 3894 => '🌉', 3895 => '🌊', 3896 => '🌋', 3897 => '🌌', 3898 => '🌍', 3899 => '🌎', 3900 => '🌏', 3901 => '🌐', 3902 => '🌑', 3903 => '🌒', 3904 => '🌓', 3905 => '🌔', 3906 => '🌕', 3907 => '🌖', 3908 => '🌗', 3909 => '🌘', 3910 => '🌙', 3911 => '🌚', 3912 => '🌛', 3913 => '🌜', 3914 => '🌝', 3915 => '🌞', 3916 => '🌟', 3917 => '🌠', 3918 => '🌰', 3919 => '🌱', 3920 => '🌲', 3921 => '🌳', 3922 => '🌴', 3923 => '🌵', 3924 => '🌷', 3925 => '🌸', 3926 => '🌹', 3927 => '🌺', 3928 => '🌻', 3929 => '🌼', 3930 => '🌽', 3931 => '🌾', 3932 => '🌿', 3933 => '🍀', 3934 => '🍁', 3935 => '🍂', 3936 => '🍃', 3937 => '🍄', 3938 => '🍅', 3939 => '🍆', 3940 => '🍇', 3941 => '🍈', 3942 => '🍉', 3943 => '🍊', 3944 => '🍋', 3945 => '🍌', 3946 => '🍍', 3947 => '🍎', 3948 => '🍏', 3949 => '🍐', 3950 => '🍑', 3951 => '🍒', 3952 => '🍓', 3953 => '🍔', 3954 => '🍕', 3955 => '🍖', 3956 => '🍗', 3957 => '🍘', 3958 => '🍙', 3959 => '🍚', 3960 => '🍛', 3961 => '🍜', 3962 => '🍝', 3963 => '🍞', 3964 => '🍟', 3965 => '🍠', 3966 => '🍡', 3967 => '🍢', 3968 => '🍣', 3969 => '🍤', 3970 => '🍥', 3971 => '🍦', 3972 => '🍧', 3973 => '🍨', 3974 => '🍩', 3975 => '🍪', 3976 => '🍫', 3977 => '🍬', 3978 => '🍭', 3979 => '🍮', 3980 => '🍯', 3981 => '🍰', 3982 => '🍱', 3983 => '🍲', 3984 => '🍳', 3985 => '🍴', 3986 => '🍵', 3987 => '🍶', 3988 => '🍷', 3989 => '🍸', 3990 => '🍹', 3991 => '🍺', 3992 => '🍻', 3993 => '🍼', 3994 => '🎀', 3995 => '🎁', 3996 => '🎂', 3997 => '🎃', 3998 => '🎄', 3999 => '🎅', 4000 => '🎆', 4001 => '🎇', 4002 => '🎈', 4003 => '🎉', 4004 => '🎊', 4005 => '🎋', 4006 => '🎌', 4007 => '🎍', 4008 => '🎎', 4009 => '🎏', 4010 => '🎐', 4011 => '🎑', 4012 => '🎒', 4013 => '🎓', 4014 => '🎠', 4015 => '🎡', 4016 => '🎢', 4017 => '🎣', 4018 => '🎤', 4019 => '🎥', 4020 => '🎦', 4021 => '🎧', 4022 => '🎨', 4023 => '🎩', 4024 => '🎪', 4025 => '🎫', 4026 => '🎬', 4027 => '🎭', 4028 => '🎮', 4029 => '🎯', 4030 => '🎰', 4031 => '🎱', 4032 => '🎲', 4033 => '🎳', 4034 => '🎴', 4035 => '🎵', 4036 => '🎶', 4037 => '🎷', 4038 => '🎸', 4039 => '🎹', 4040 => '🎺', 4041 => '🎻', 4042 => '🎼', 4043 => '🎽', 4044 => '🎾', 4045 => '🎿', 4046 => '🏀', 4047 => '🏁', 4048 => '🏂', 4049 => '🏃', 4050 => '🏄', 4051 => '🏆', 4052 => '🏇', 4053 => '🏈', 4054 => '🏉', 4055 => '🏊', 4056 => '🏠', 4057 => '🏡', 4058 => '🏢', 4059 => '🏣', 4060 => '🏤', 4061 => '🏥', 4062 => '🏦', 4063 => '🏧', 4064 => '🏨', 4065 => '🏩', 4066 => '🏪', 4067 => '🏫', 4068 => '🏬', 4069 => '🏭', 4070 => '🏮', 4071 => '🏯', 4072 => '🏰', 4073 => '🐀', 4074 => '🐁', 4075 => '🐂', 4076 => '🐃', 4077 => '🐄', 4078 => '🐅', 4079 => '🐆', 4080 => '🐇', 4081 => '🐈', 4082 => '🐉', 4083 => '🐊', 4084 => '🐋', 4085 => '🐌', 4086 => '🐍', 4087 => '🐎', 4088 => '🐏', 4089 => '🐐', 4090 => '🐑', 4091 => '🐒', 4092 => '🐓', 4093 => '🐔', 4094 => '🐕', 4095 => '🐖', 4096 => '🐗', 4097 => '🐘', 4098 => '🐙', 4099 => '🐚', 4100 => '🐛', 4101 => '🐜', 4102 => '🐝', 4103 => '🐞', 4104 => '🐟', 4105 => '🐠', 4106 => '🐡', 4107 => '🐢', 4108 => '🐣', 4109 => '🐤', 4110 => '🐥', 4111 => '🐦', 4112 => '🐧', 4113 => '🐨', 4114 => '🐩', 4115 => '🐪', 4116 => '🐫', 4117 => '🐬', 4118 => '🐭', 4119 => '🐮', 4120 => '🐯', 4121 => '🐰', 4122 => '🐱', 4123 => '🐲', 4124 => '🐳', 4125 => '🐴', 4126 => '🐵', 4127 => '🐶', 4128 => '🐷', 4129 => '🐸', 4130 => '🐹', 4131 => '🐺', 4132 => '🐻', 4133 => '🐼', 4134 => '🐽', 4135 => '🐾', 4136 => '👀', 4137 => '👂', 4138 => '👃', 4139 => '👄', 4140 => '👅', 4141 => '👆', 4142 => '👇', 4143 => '👈', 4144 => '👉', 4145 => '👊', 4146 => '👋', 4147 => '👌', 4148 => '👍', 4149 => '👎', 4150 => '👏', 4151 => '👐', 4152 => '👑', 4153 => '👒', 4154 => '👓', 4155 => '👔', 4156 => '👕', 4157 => '👖', 4158 => '👗', 4159 => '👘', 4160 => '👙', 4161 => '👚', 4162 => '👛', 4163 => '👜', 4164 => '👝', 4165 => '👞', 4166 => '👟', 4167 => '👠', 4168 => '👡', 4169 => '👢', 4170 => '👣', 4171 => '👤', 4172 => '👥', 4173 => '👦', 4174 => '👧', 4175 => '👨', 4176 => '👩', 4177 => '👪', 4178 => '👫', 4179 => '👬', 4180 => '👭', 4181 => '👮', 4182 => '👯', 4183 => '👰', 4184 => '👱', 4185 => '👲', 4186 => '👳', 4187 => '👴', 4188 => '👵', 4189 => '👶', 4190 => '👷', 4191 => '👸', 4192 => '👹', 4193 => '👺', 4194 => '👻', 4195 => '👼', 4196 => '👽', 4197 => '👾', 4198 => '👿', 4199 => '💀', 4200 => '💁', 4201 => '💂', 4202 => '💃', 4203 => '💄', 4204 => '💅', 4205 => '💆', 4206 => '💇', 4207 => '💈', 4208 => '💉', 4209 => '💊', 4210 => '💋', 4211 => '💌', 4212 => '💍', 4213 => '💎', 4214 => '💏', 4215 => '💐', 4216 => '💑', 4217 => '💒', 4218 => '💓', 4219 => '💔', 4220 => '💕', 4221 => '💖', 4222 => '💗', 4223 => '💘', 4224 => '💙', 4225 => '💚', 4226 => '💛', 4227 => '💜', 4228 => '💝', 4229 => '💞', 4230 => '💟', 4231 => '💠', 4232 => '💡', 4233 => '💢', 4234 => '💣', 4235 => '💤', 4236 => '💥', 4237 => '💦', 4238 => '💧', 4239 => '💨', 4240 => '💩', 4241 => '💪', 4242 => '💫', 4243 => '💬', 4244 => '💭', 4245 => '💮', 4246 => '💯', 4247 => '💰', 4248 => '💱', 4249 => '💲', 4250 => '💳', 4251 => '💴', 4252 => '💵', 4253 => '💶', 4254 => '💷', 4255 => '💸', 4256 => '💹', 4257 => '💺', 4258 => '💻', 4259 => '💼', 4260 => '💽', 4261 => '💾', 4262 => '💿', 4263 => '📀', 4264 => '📁', 4265 => '📂', 4266 => '📃', 4267 => '📄', 4268 => '📅', 4269 => '📆', 4270 => '📇', 4271 => '📈', 4272 => '📉', 4273 => '📊', 4274 => '📋', 4275 => '📌', 4276 => '📍', 4277 => '📎', 4278 => '📏', 4279 => '📐', 4280 => '📑', 4281 => '📒', 4282 => '📓', 4283 => '📔', 4284 => '📕', 4285 => '📖', 4286 => '📗', 4287 => '📘', 4288 => '📙', 4289 => '📚', 4290 => '📛', 4291 => '📜', 4292 => '📝', 4293 => '📞', 4294 => '📟', 4295 => '📠', 4296 => '📡', 4297 => '📢', 4298 => '📣', 4299 => '📤', 4300 => '📥', 4301 => '📦', 4302 => '📧', 4303 => '📨', 4304 => '📩', 4305 => '📪', 4306 => '📫', 4307 => '📬', 4308 => '📭', 4309 => '📮', 4310 => '📯', 4311 => '📰', 4312 => '📱', 4313 => '📲', 4314 => '📳', 4315 => '📴', 4316 => '📵', 4317 => '📶', 4318 => '📷', 4319 => '📹', 4320 => '📺', 4321 => '📻', 4322 => '📼', 4323 => '🔀', 4324 => '🔁', 4325 => '🔂', 4326 => '🔃', 4327 => '🔄', 4328 => '🔅', 4329 => '🔆', 4330 => '🔇', 4331 => '🔈', 4332 => '🔉', 4333 => '🔊', 4334 => '🔋', 4335 => '🔌', 4336 => '🔍', 4337 => '🔎', 4338 => '🔏', 4339 => '🔐', 4340 => '🔑', 4341 => '🔒', 4342 => '🔓', 4343 => '🔔', 4344 => '🔕', 4345 => '🔖', 4346 => '🔗', 4347 => '🔘', 4348 => '🔙', 4349 => '🔚', 4350 => '🔛', 4351 => '🔜', 4352 => '🔝', 4353 => '🔞', 4354 => '🔟', 4355 => '🔠', 4356 => '🔡', 4357 => '🔢', 4358 => '🔣', 4359 => '🔤', 4360 => '🔥', 4361 => '🔦', 4362 => '🔧', 4363 => '🔨', 4364 => '🔩', 4365 => '🔪', 4366 => '🔫', 4367 => '🔬', 4368 => '🔭', 4369 => '🔮', 4370 => '🔯', 4371 => '🔰', 4372 => '🔱', 4373 => '🔲', 4374 => '🔳', 4375 => '🔴', 4376 => '🔵', 4377 => '🔶', 4378 => '🔷', 4379 => '🔸', 4380 => '🔹', 4381 => '🔺', 4382 => '🔻', 4383 => '🔼', 4384 => '🔽', 4385 => '🕐', 4386 => '🕑', 4387 => '🕒', 4388 => '🕓', 4389 => '🕔', 4390 => '🕕', 4391 => '🕖', 4392 => '🕗', 4393 => '🕘', 4394 => '🕙', 4395 => '🕚', 4396 => '🕛', 4397 => '🕜', 4398 => '🕝', 4399 => '🕞', 4400 => '🕟', 4401 => '🕠', 4402 => '🕡', 4403 => '🕢', 4404 => '🕣', 4405 => '🕤', 4406 => '🕥', 4407 => '🕦', 4408 => '🕧', 4409 => '🗻', 4410 => '🗼', 4411 => '🗽', 4412 => '🗾', 4413 => '🗿', 4414 => '😁', 4415 => '😂', 4416 => '😃', 4417 => '😄', 4418 => '😅', 4419 => '😆', 4420 => '😇', 4421 => '😈', 4422 => '😉', 4423 => '😊', 4424 => '😋', 4425 => '😌', 4426 => '😍', 4427 => '😎', 4428 => '😏', 4429 => '😐', 4430 => '😒', 4431 => '😓', 4432 => '😔', 4433 => '😖', 4434 => '😘', 4435 => '😚', 4436 => '😜', 4437 => '😝', 4438 => '😞', 4439 => '😠', 4440 => '😡', 4441 => '😢', 4442 => '😣', 4443 => '😤', 4444 => '😥', 4445 => '😨', 4446 => '😩', 4447 => '😪', 4448 => '😫', 4449 => '😭', 4450 => '😰', 4451 => '😱', 4452 => '😲', 4453 => '😳', 4454 => '😵', 4455 => '😶', 4456 => '😷', 4457 => '😸', 4458 => '😹', 4459 => '😺', 4460 => '😻', 4461 => '😼', 4462 => '😽', 4463 => '😾', 4464 => '😿', 4465 => '🙀', 4466 => '🙅', 4467 => '🙆', 4468 => '🙇', 4469 => '🙈', 4470 => '🙉', 4471 => '🙊', 4472 => '🙋', 4473 => '🙌', 4474 => '🙍', 4475 => '🙎', 4476 => '🙏', 4477 => '🚀', 4478 => '🚁', 4479 => '🚂', 4480 => '🚃', 4481 => '🚄', 4482 => '🚅', 4483 => '🚆', 4484 => '🚇', 4485 => '🚈', 4486 => '🚉', 4487 => '🚊', 4488 => '🚋', 4489 => '🚌', 4490 => '🚍', 4491 => '🚎', 4492 => '🚏', 4493 => '🚐', 4494 => '🚑', 4495 => '🚒', 4496 => '🚓', 4497 => '🚔', 4498 => '🚕', 4499 => '🚖', 4500 => '🚗', 4501 => '🚘', 4502 => '🚙', 4503 => '🚚', 4504 => '🚛', 4505 => '🚜', 4506 => '🚝', 4507 => '🚞', 4508 => '🚟', 4509 => '🚠', 4510 => '🚡', 4511 => '🚢', 4512 => '🚣', 4513 => '🚤', 4514 => '🚥', 4515 => '🚦', 4516 => '🚧', 4517 => '🚨', 4518 => '🚩', 4519 => '🚪', 4520 => '🚫', 4521 => '🚬', 4522 => '🚭', 4523 => '🚮', 4524 => '🚯', 4525 => '🚰', 4526 => '🚱', 4527 => '🚲', 4528 => '🚳', 4529 => '🚴', 4530 => '🚵', 4531 => '🚶', 4532 => '🚷', 4533 => '🚸', 4534 => '🚹', 4535 => '🚺', 4536 => '🚻', 4537 => '🚼', 4538 => '🚽', 4539 => '🚾', 4540 => '🚿', 4541 => '🛀', 4542 => '🛁', 4543 => '🛂', 4544 => '🛃', 4545 => '🛄', 4546 => '🛅', 4547 => '🜀', 4548 => '🜁', 4549 => '🜂', 4550 => '🜃', 4551 => '🜄', 4552 => '🜅', 4553 => '🜆', 4554 => '🜇', 4555 => '🜈', 4556 => '🜉', 4557 => '🜊', 4558 => '🜋', 4559 => '🜌', 4560 => '🜍', 4561 => '🜎', 4562 => '🜏', 4563 => '🜐', 4564 => '🜑', 4565 => '🜒', 4566 => '🜓', 4567 => '🜔', 4568 => '🜕', 4569 => '🜖', 4570 => '🜗', 4571 => '🜘', 4572 => '🜙', 4573 => '🜚', 4574 => '🜛', 4575 => '🜜', 4576 => '🜝', 4577 => '🜞', 4578 => '🜟', 4579 => '🜠', 4580 => '🜡', 4581 => '🜢', 4582 => '🜣', 4583 => '🜤', 4584 => '🜥', 4585 => '🜦', 4586 => '🜧', 4587 => '🜨', 4588 => '🜩', 4589 => '🜪', 4590 => '🜫', 4591 => '🜬', 4592 => '🜭', 4593 => '🜮', 4594 => '🜯', 4595 => '🜰', 4596 => '🜱', 4597 => '🜲', 4598 => '🜳', 4599 => '🜴', 4600 => '🜵', 4601 => '🜶', 4602 => '🜷', 4603 => '🜸', 4604 => '🜹', 4605 => '🜺', 4606 => '🜻', 4607 => '🜼', 4608 => '🜽', 4609 => '🜾', 4610 => '🜿', 4611 => '🝀', 4612 => '🝁', 4613 => '🝂', 4614 => '🝃', 4615 => '🝄', 4616 => '🝅', 4617 => '🝆', 4618 => '🝇', 4619 => '🝈', 4620 => '🝉', 4621 => '🝊', 4622 => '🝋', 4623 => '🝌', 4624 => '🝍', 4625 => '🝎', 4626 => '🝏', 4627 => '🝐', 4628 => '🝑', 4629 => '🝒', 4630 => '🝓', 4631 => '🝔', 4632 => '🝕', 4633 => '🝖', 4634 => '🝗', 4635 => '🝘', 4636 => '🝙', 4637 => '🝚', 4638 => '🝛', 4639 => '🝜', 4640 => '🝝', 4641 => '🝞', 4642 => '🝟', 4643 => '🝠', 4644 => '🝡', 4645 => '🝢', 4646 => '🝣', 4647 => '🝤', 4648 => '🝥', 4649 => '🝦', 4650 => '🝧', 4651 => '🝨', 4652 => '🝩', 4653 => '🝪', 4654 => '🝫', 4655 => '🝬', 4656 => '🝭', 4657 => '🝮', 4658 => '🝯', 4659 => '🝰', 4660 => '🝱', 4661 => '🝲', 4662 => '🝳', 4663 => '㆐', 4664 => '㆑', 4665 => '', 4666 => '�', 4667 => '৴', 4668 => '৵', 4669 => '৶', 4670 => '৷', 4671 => '৸', 4672 => '৹', 4673 => '୲', 4674 => '୳', 4675 => '୴', 4676 => '୵', 4677 => '୶', 4678 => '୷', 4679 => '꠰', 4680 => '꠱', 4681 => '꠲', 4682 => '꠳', 4683 => '꠴', 4684 => '꠵', 4685 => '௰', 4686 => '௱', 4687 => '௲', 4688 => '൰', 4689 => '൱', 4690 => '൲', 4691 => '൳', 4692 => '൴', 4693 => '൵', 4694 => '፲', 4695 => '፳', 4696 => '፴', 4697 => '፵', 4698 => '፶', 4699 => '፷', 4700 => '፸', 4701 => '፹', 4702 => '፺', 4703 => '፻', 4704 => '፼', 4705 => 'ↀ', 4706 => 'ↁ', 4707 => 'ↂ', 4708 => 'ↆ', 4709 => 'ↇ', 4710 => 'ↈ', 4711 => '𐹩', 4712 => '𐹪', 4713 => '𐹫', 4714 => '𐹬', 4715 => '𐹭', 4716 => '𐹮', 4717 => '𐹯', 4718 => '𐹰', 4719 => '𐹱', 4720 => '𐹲', 4721 => '𐹳', 4722 => '𐹴', 4723 => '𐹵', 4724 => '𐹶', 4725 => '𐹷', 4726 => '𐹸', 4727 => '𐹹', 4728 => '𐹺', 4729 => '𐹻', 4730 => '𐹼', 4731 => '𐹽', 4732 => '𐹾', 4733 => '⳽', 4734 => '𐌢', 4735 => '𐌣', 4736 => '𐄐', 4737 => '𐄑', 4738 => '𐄒', 4739 => '𐄓', 4740 => '𐄔', 4741 => '𐄕', 4742 => '𐄖', 4743 => '𐄗', 4744 => '𐄘', 4745 => '𐄙', 4746 => '𐄚', 4747 => '𐄛', 4748 => '𐄜', 4749 => '𐄝', 4750 => '𐄞', 4751 => '𐄟', 4752 => '𐄠', 4753 => '𐄡', 4754 => '𐄢', 4755 => '𐄣', 4756 => '𐄤', 4757 => '𐄥', 4758 => '𐄦', 4759 => '𐄧', 4760 => '𐄨', 4761 => '𐄩', 4762 => '𐄪', 4763 => '𐄫', 4764 => '𐄬', 4765 => '𐄭', 4766 => '𐄮', 4767 => '𐄯', 4768 => '𐄰', 4769 => '𐄱', 4770 => '𐄲', 4771 => '𐄳', 4772 => '𐅀', 4773 => '𐅁', 4774 => '𐅄', 4775 => '𐅅', 4776 => '𐅆', 4777 => '𐅇', 4778 => '𐅉', 4779 => '𐅊', 4780 => '𐅋', 4781 => '𐅌', 4782 => '𐅍', 4783 => '𐅎', 4784 => '𐅐', 4785 => '𐅑', 4786 => '𐅒', 4787 => '𐅓', 4788 => '𐅔', 4789 => '𐅕', 4790 => '𐅖', 4791 => '𐅗', 4792 => '𐅠', 4793 => '𐅡', 4794 => '𐅢', 4795 => '𐅣', 4796 => '𐅤', 4797 => '𐅥', 4798 => '𐅦', 4799 => '𐅧', 4800 => '𐅨', 4801 => '𐅩', 4802 => '𐅪', 4803 => '𐅫', 4804 => '𐅬', 4805 => '𐅭', 4806 => '𐅮', 4807 => '𐅯', 4808 => '𐅰', 4809 => '𐅱', 4810 => '𐅲', 4811 => '𐅴', 4812 => '𐅵', 4813 => '𐅶', 4814 => '𐅷', 4815 => '𐅸', 4816 => '𐏓', 4817 => '𐏔', 4818 => '𐏕', 4819 => '𐩾', 4820 => '𐩿', 4821 => '𐤗', 4822 => '𐤘', 4823 => '𐤙', 4824 => '𐡛', 4825 => '𐡜', 4826 => '𐡝', 4827 => '𐡞', 4828 => '𐡟', 4829 => '𐭜', 4830 => '𐭝', 4831 => '𐭞', 4832 => '𐭟', 4833 => '𐭼', 4834 => '𐭽', 4835 => '𐭾', 4836 => '𐭿', 4837 => '𑁛', 4838 => '𑁜', 4839 => '𑁝', 4840 => '𑁞', 4841 => '𑁟', 4842 => '𑁠', 4843 => '𑁡', 4844 => '𑁢', 4845 => '𑁣', 4846 => '𑁤', 4847 => '𑁥', 4848 => '𐩄', 4849 => '𐩅', 4850 => '𐩆', 4851 => '𐩇', 4852 => '𒐲', 4853 => '𒐳', 4854 => '𒑖', 4855 => '𒑗', 4856 => '𒑚', 4857 => '𒑛', 4858 => '𒑜', 4859 => '𒑝', 4860 => '𒑞', 4861 => '𒑟', 4862 => '𒑠', 4863 => '𒑡', 4864 => '𒑢', 4865 => '𝍩', 4866 => '𝍪', 4867 => '𝍫', 4868 => '𝍬', 4869 => '𝍭', 4870 => '𝍮', 4871 => '𝍯', 4872 => '𝍰', 4873 => '𝍱', 4874 => 'ː', 4875 => 'ˑ', 4876 => 'ॱ', 4877 => 'ๆ', 4878 => 'ໆ', 4879 => 'ᪧ', 4880 => 'ꧏ', 4881 => 'ꩰ', 4882 => 'ꫝ', 4883 => 'ゝ', 4884 => 'ー', 4885 => 'ヽ', 4886 => '¤', 4887 => '¢', 4888 => '$', 4889 => '£', 4890 => '¥', 4891 => '؋', 4892 => '৲', 4893 => '৳', 4894 => '৻', 4895 => '૱', 4896 => '꠸', 4897 => '௹', 4898 => '฿', 4899 => '៛', 4900 => '₠', 4901 => '₡', 4902 => '₢', 4903 => '₣', 4904 => '₤', 4905 => '₥', 4906 => '₦', 4907 => '₧', 4908 => '₩', 4909 => '₪', 4910 => '₫', 4911 => '€', 4912 => '₭', 4913 => '₮', 4914 => '₯', 4915 => '₰', 4916 => '₱', 4917 => '₲', 4918 => '₳', 4919 => '₴', 4920 => '₵', 4921 => '₶', 4922 => '₷', 4923 => '₸', 4924 => '₹', 4925 => '0', 4926 => '1', 4927 => '2', 4928 => '3', 4929 => '4', 4930 => '5', 4931 => '6', 4932 => '7', 4933 => '8', 4934 => '9', 4935 => 'A', 4936 => 'ᴀ', 4937 => 'Ⱥ', 4938 => 'ᶏ', 4939 => 'ᴁ', 4940 => 'ᴂ', 4941 => 'Ɐ', 4942 => 'Ɑ', 4943 => 'ᶐ', 4944 => 'Ɒ', 4945 => 'B', 4946 => 'ʙ', 4947 => 'Ƀ', 4948 => 'ᴯ', 4949 => 'ᴃ', 4950 => 'ᵬ', 4951 => 'ᶀ', 4952 => 'Ɓ', 4953 => 'Ƃ', 4954 => 'C', 4955 => 'ᴄ', 4956 => 'Ȼ', 4957 => 'Ƈ', 4958 => 'ɕ', 4959 => 'Ↄ', 4960 => 'Ꜿ', 4961 => 'D', 4962 => 'ᴅ', 4963 => 'ᴆ', 4964 => 'ᵭ', 4965 => 'ᶁ', 4966 => 'Ɖ', 4967 => 'Ɗ', 4968 => 'ᶑ', 4969 => 'Ƌ', 4970 => 'ȡ', 4971 => 'ꝱ', 4972 => 'ẟ', 4973 => 'E', 4974 => 'ᴇ', 4975 => 'Ɇ', 4976 => 'ᶒ', 4977 => 'ⱸ', 4978 => 'Ǝ', 4979 => 'ⱻ', 4980 => 'Ə', 4981 => 'ᶕ', 4982 => 'Ɛ', 4983 => 'ᶓ', 4984 => 'ɘ', 4985 => 'ɚ', 4986 => 'ɜ', 4987 => 'ᶔ', 4988 => 'ᴈ', 4989 => 'ɝ', 4990 => 'ɞ', 4991 => 'ʚ', 4992 => 'ɤ', 4993 => 'F', 4994 => 'ꜰ', 4995 => 'ᵮ', 4996 => 'ᶂ', 4997 => 'Ƒ', 4998 => 'Ⅎ', 4999 => 'ꟻ', 5000 => 'G', 5001 => 'ɡ', 5002 => 'ɢ', 5003 => 'Ǥ', 5004 => 'ᶃ', 5005 => 'Ɠ', 5006 => 'ʛ', 5007 => 'ᵷ', 5008 => 'Ꝿ', 5009 => 'Ɣ', 5010 => 'Ƣ', 5011 => 'H', 5012 => 'ʜ', 5013 => 'Ƕ', 5014 => 'ɦ', 5015 => 'Ⱨ', 5016 => 'Ⱶ', 5017 => 'Ꜧ', 5018 => 'ɧ', 5019 => 'ʻ', 5020 => 'ʽ', 5021 => 'I', 5022 => 'ı', 5023 => 'ɪ', 5024 => 'ꟾ', 5025 => 'ᴉ', 5026 => 'Ɨ', 5027 => 'ᵻ', 5028 => 'ᶖ', 5029 => 'Ɩ', 5030 => 'ᵼ', 5031 => 'J', 5032 => 'ȷ', 5033 => 'ᴊ', 5034 => 'Ɉ', 5035 => 'ʝ', 5036 => 'ɟ', 5037 => 'ʄ', 5038 => 'K', 5039 => 'ᴋ', 5040 => 'ᶄ', 5041 => 'Ƙ', 5042 => 'Ⱪ', 5043 => 'Ꝁ', 5044 => 'Ꝃ', 5045 => 'Ꝅ', 5046 => 'ʞ', 5047 => 'L', 5048 => 'ʟ', 5049 => 'Ꝇ', 5050 => 'ᴌ', 5051 => 'Ꝉ', 5052 => 'Ƚ', 5053 => 'Ⱡ', 5054 => 'Ɫ', 5055 => 'ɬ', 5056 => 'ᶅ', 5057 => 'ɭ', 5058 => 'ꞎ', 5059 => 'ȴ', 5060 => 'ꝲ', 5061 => 'ɮ', 5062 => 'Ꞁ', 5063 => 'ƛ', 5064 => 'ʎ', 5065 => 'M', 5066 => 'ᴍ', 5067 => 'ᵯ', 5068 => 'ᶆ', 5069 => 'Ɱ', 5070 => 'ꟽ', 5071 => 'ꟿ', 5072 => 'ꝳ', 5073 => 'N', 5074 => 'ɴ', 5075 => 'ᴻ', 5076 => 'ᴎ', 5077 => 'ᵰ', 5078 => 'Ɲ', 5079 => 'Ƞ', 5080 => 'Ꞑ', 5081 => 'ᶇ', 5082 => 'ɳ', 5083 => 'ȵ', 5084 => 'ꝴ', 5085 => 'Ŋ', 5086 => 'O', 5087 => 'ᴏ', 5088 => 'ᴑ', 5089 => 'ɶ', 5090 => 'ᴔ', 5091 => 'ᴓ', 5092 => 'Ɔ', 5093 => 'ᴐ', 5094 => 'ᴒ', 5095 => 'ᶗ', 5096 => 'Ꝍ', 5097 => 'ᴖ', 5098 => 'ᴗ', 5099 => 'ⱺ', 5100 => 'Ɵ', 5101 => 'Ꝋ', 5102 => 'ɷ', 5103 => 'Ȣ', 5104 => 'ᴕ', 5105 => 'P', 5106 => 'ᴘ', 5107 => 'Ᵽ', 5108 => 'Ꝑ', 5109 => 'ᵱ', 5110 => 'ᶈ', 5111 => 'Ƥ', 5112 => 'Ꝓ', 5113 => 'Ꝕ', 5114 => 'ꟼ', 5115 => 'ɸ', 5116 => 'ⱷ', 5117 => 'Q', 5118 => 'Ꝗ', 5119 => 'Ꝙ', 5120 => 'ʠ', 5121 => 'Ɋ', 5122 => 'ĸ', 5123 => 'R', 5124 => 'Ʀ', 5125 => 'Ꝛ', 5126 => 'ᴙ', 5127 => 'Ɍ', 5128 => 'ᵲ', 5129 => 'ɹ', 5130 => 'ᴚ', 5131 => 'ɺ', 5132 => 'ᶉ', 5133 => 'ɻ', 5134 => 'ⱹ', 5135 => 'ɼ', 5136 => 'Ɽ', 5137 => 'ɾ', 5138 => 'ᵳ', 5139 => 'ɿ', 5140 => 'ʁ', 5141 => 'ꝵ', 5142 => 'ꝶ', 5143 => 'Ꝝ', 5144 => 'S', 5145 => 'ꜱ', 5146 => 'ᵴ', 5147 => 'ᶊ', 5148 => 'ʂ', 5149 => 'Ȿ', 5150 => 'ẜ', 5151 => 'ẝ', 5152 => 'Ʃ', 5153 => 'ᶋ', 5154 => 'ƪ', 5155 => 'ʅ', 5156 => 'ᶘ', 5157 => 'ʆ', 5158 => 'T', 5159 => 'ᴛ', 5160 => 'Ŧ', 5161 => 'Ⱦ', 5162 => 'ᵵ', 5163 => 'ƫ', 5164 => 'Ƭ', 5165