MediaWiki master
Parser.php
Go to the documentation of this file.
1<?php
24namespace MediaWiki\Parser;
25
26use BadMethodCallException;
30use CoreTagHooks;
31use Exception;
32use File;
33use HtmlArmor;
37use InvalidArgumentException;
38use Language;
39use LanguageCode;
41use LogicException;
42use MapCacheLRU;
43use MediaHandler;
49use MediaWiki\Debug\DeprecationHelper;
91use PPFrame;
92use PPNode;
93use Preprocessor;
95use Psr\Log\LoggerInterface;
96use RuntimeException;
98use StringUtils;
99use StripState;
100use UnexpectedValueException;
102use Wikimedia\Bcp47Code\Bcp47CodeValue;
103use Wikimedia\IPUtils;
104use Wikimedia\Parsoid\Core\SectionMetadata;
105use Wikimedia\Parsoid\Core\TOCData;
106use Wikimedia\Parsoid\DOM\Comment;
107use Wikimedia\Parsoid\DOM\DocumentFragment;
108use Wikimedia\Parsoid\DOM\Element;
109use Wikimedia\Parsoid\DOM\Node;
110use Wikimedia\Parsoid\Utils\DOMCompat;
111use Wikimedia\Parsoid\Utils\DOMUtils;
112use Wikimedia\ScopedCallback;
113
154#[\AllowDynamicProperties]
155class Parser {
156 use DeprecationHelper;
157
158 # Flags for Parser::setFunctionHook
159 public const SFH_NO_HASH = 1;
160 public const SFH_OBJECT_ARGS = 2;
161
162 # Constants needed for external link processing
170 public const EXT_LINK_URL_CLASS = '[^][<>"\\x00-\\x20\\x7F\p{Zs}\x{FFFD}]';
175 // phpcs:ignore Generic.Files.LineLength
176 private const EXT_LINK_ADDR = '(?:[0-9.]+|\\[(?i:[0-9a-f:.]+)\\]|[^][<>"\\x00-\\x20\\x7F\p{Zs}\x{FFFD}])';
178 // phpcs:ignore Generic.Files.LineLength
179 private const EXT_IMAGE_REGEX = '/^(http:\/\/|https:\/\/)((?:\\[(?i:[0-9a-f:.]+)\\])?[^][<>"\\x00-\\x20\\x7F\p{Zs}\x{FFFD}]+)
180 \\/([A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF]+)\\.((?i)avif|gif|jpg|jpeg|png|svg|webp)$/Sxu';
181
183 private const SPACE_NOT_NL = '(?:\t|&nbsp;|&\#0*160;|&\#[Xx]0*[Aa]0;|\p{Zs})';
184
189 public const PTD_FOR_INCLUSION = Preprocessor::DOM_FOR_INCLUSION;
190
191 # Allowed values for $this->mOutputType
193 public const OT_HTML = 1;
195 public const OT_WIKI = 2;
197 public const OT_PREPROCESS = 3;
202 public const OT_PLAIN = 4;
203
221 public const MARKER_SUFFIX = "-QINU`\"'\x7f";
222 public const MARKER_PREFIX = "\x7f'\"`UNIQ-";
223
238 public const TOC_PLACEHOLDER = '<meta property="mw:PageProp/toc" />';
239
247 private const TOC_PLACEHOLDER_REGEX = '/<meta\\b[^>]*\\bproperty\\s*=\\s*"mw:PageProp\\/toc"[^>]*>/';
248
249 # Persistent:
251 private array $mTagHooks = [];
253 private array $mFunctionHooks = [];
255 private array $mFunctionSynonyms = [ 0 => [], 1 => [] ];
257 private array $mStripList = [];
259 private array $mVarCache = [];
261 private array $mImageParams = [];
263 private array $mImageParamsMagicArray = [];
265 public $mMarkerIndex = 0;
266
267 // Initialised by initializeVariables()
269 private MagicWordArray $mVariables;
270 private MagicWordArray $mSubstWords;
271
272 // Initialised in constructor
274 private string $mExtLinkBracketedRegex;
275 private UrlUtils $urlUtils;
276 private Preprocessor $mPreprocessor;
277
278 // Cleared with clearState():
280 private ParserOutput $mOutput;
281 private int $mAutonumber = 0;
282 private StripState $mStripState;
283 private LinkHolderArray $mLinkHolders;
284 private int $mLinkID = 0;
285 private array $mIncludeSizes;
290 private array $mTplRedirCache;
292 public array $mHeadings;
294 private array $mDoubleUnderscores;
300 private bool $mShowToc;
301 private bool $mForceTocPosition;
302 private array $mTplDomCache;
303 private ?UserIdentity $mUser;
304
305 # Temporary
306 # These are variables reset at least once per parse regardless of $clearState
307
312 private $mOptions;
313
314 # Deprecated "dynamic" properties
315 # These used to be dynamic properties added to the parser, but these
316 # have been deprecated since 1.42.
320 public $extCite;
337
343 private Title $mTitle;
345 private int $mOutputType;
350 private array $ot;
352 private ?int $mRevisionId = null;
354 private ?string $mRevisionTimestamp = null;
356 private ?string $mRevisionUser = null;
358 private ?int $mRevisionSize = null;
360 private $mInputSize = false;
361
362 private ?RevisionRecord $mRevisionRecordObject = null;
363
369 private array $mLangLinkLanguages;
370
376 private ?MapCacheLRU $currentRevisionCache = null;
377
382 private $mInParse = false;
383
384 private SectionProfiler $mProfiler;
385 private ?LinkRenderer $mLinkRenderer = null;
386
387 private MagicWordFactory $magicWordFactory;
388 private Language $contLang;
389 private LanguageConverterFactory $languageConverterFactory;
390 private LanguageNameUtils $languageNameUtils;
391 private ParserFactory $factory;
392 private SpecialPageFactory $specialPageFactory;
393 private TitleFormatter $titleFormatter;
399 private ServiceOptions $svcOptions;
400 private LinkRendererFactory $linkRendererFactory;
401 private NamespaceInfo $nsInfo;
402 private LoggerInterface $logger;
403 private BadFileLookup $badFileLookup;
404 private HookContainer $hookContainer;
405 private HookRunner $hookRunner;
406 private TidyDriverBase $tidy;
407 private WANObjectCache $wanCache;
408 private UserOptionsLookup $userOptionsLookup;
409 private UserFactory $userFactory;
410 private HttpRequestFactory $httpRequestFactory;
411 private TrackingCategories $trackingCategories;
412 private SignatureValidatorFactory $signatureValidatorFactory;
413 private UserNameUtils $userNameUtils;
414
418 public const CONSTRUCTOR_OPTIONS = [
419 // See documentation for the corresponding config options
420 // Many of these are only used in (eg) CoreMagicVariables
443 ];
444
472 public function __construct(
473 ServiceOptions $svcOptions,
474 MagicWordFactory $magicWordFactory,
475 Language $contLang,
476 ParserFactory $factory,
477 UrlUtils $urlUtils,
478 SpecialPageFactory $spFactory,
479 LinkRendererFactory $linkRendererFactory,
480 NamespaceInfo $nsInfo,
481 LoggerInterface $logger,
482 BadFileLookup $badFileLookup,
483 LanguageConverterFactory $languageConverterFactory,
484 LanguageNameUtils $languageNameUtils,
485 HookContainer $hookContainer,
486 TidyDriverBase $tidy,
487 WANObjectCache $wanCache,
488 UserOptionsLookup $userOptionsLookup,
489 UserFactory $userFactory,
490 TitleFormatter $titleFormatter,
491 HttpRequestFactory $httpRequestFactory,
492 TrackingCategories $trackingCategories,
493 SignatureValidatorFactory $signatureValidatorFactory,
494 UserNameUtils $userNameUtils
495 ) {
496 $this->deprecateDynamicPropertiesAccess( '1.42', __CLASS__ );
497 $this->deprecatePublicProperty( 'ot', '1.35', __CLASS__ );
498 $this->deprecatePublicProperty( 'mTitle', '1.35', __CLASS__ );
499 $this->deprecatePublicProperty( 'mOptions', '1.35', __CLASS__ );
500
502 // Direct construction of Parser was deprecated in 1.34 and
503 // removed in 1.36; use a ParserFactory instead.
504 throw new BadMethodCallException( 'Direct construction of Parser not allowed' );
505 }
506 $svcOptions->assertRequiredOptions( self::CONSTRUCTOR_OPTIONS );
507 $this->svcOptions = $svcOptions;
508
509 $this->urlUtils = $urlUtils;
510 $this->mExtLinkBracketedRegex = '/\[(((?i)' . $this->urlUtils->validProtocols() . ')' .
511 self::EXT_LINK_ADDR .
512 self::EXT_LINK_URL_CLASS . '*)\p{Zs}*([^\]\\x00-\\x08\\x0a-\\x1F\\x{FFFD}]*)\]/Su';
513
514 $this->magicWordFactory = $magicWordFactory;
515
516 $this->contLang = $contLang;
517
518 $this->factory = $factory;
519 $this->specialPageFactory = $spFactory;
520 $this->linkRendererFactory = $linkRendererFactory;
521 $this->nsInfo = $nsInfo;
522 $this->logger = $logger;
523 $this->badFileLookup = $badFileLookup;
524
525 $this->languageConverterFactory = $languageConverterFactory;
526 $this->languageNameUtils = $languageNameUtils;
527
528 $this->hookContainer = $hookContainer;
529 $this->hookRunner = new HookRunner( $hookContainer );
530
531 $this->tidy = $tidy;
532
533 $this->wanCache = $wanCache;
534 $this->mPreprocessor = new Preprocessor_Hash(
535 $this,
536 $this->wanCache,
537 [
538 'cacheThreshold' => $svcOptions->get( MainConfigNames::PreprocessorCacheThreshold ),
539 'disableLangConversion' => $languageConverterFactory->isConversionDisabled(),
540 ]
541 );
542
543 $this->userOptionsLookup = $userOptionsLookup;
544 $this->userFactory = $userFactory;
545 $this->titleFormatter = $titleFormatter;
546 $this->httpRequestFactory = $httpRequestFactory;
547 $this->trackingCategories = $trackingCategories;
548 $this->signatureValidatorFactory = $signatureValidatorFactory;
549 $this->userNameUtils = $userNameUtils;
550
551 // These steps used to be done in "::firstCallInit()"
552 // (if you're chasing a reference from some old code)
553 CoreParserFunctions::register(
554 $this,
555 new ServiceOptions( CoreParserFunctions::REGISTER_OPTIONS, $svcOptions )
556 );
558 $this,
560 );
561 $this->initializeVariables();
562
563 $this->hookRunner->onParserFirstCallInit( $this );
564 $this->mTitle = Title::makeTitle( NS_SPECIAL, 'Badtitle/Missing' );
565 }
566
570 public function __destruct() {
571 // @phan-suppress-next-line PhanRedundantCondition Typed property not set in constructor, may be uninitialized
572 if ( isset( $this->mLinkHolders ) ) {
573 // @phan-suppress-next-line PhanTypeObjectUnsetDeclaredProperty
574 unset( $this->mLinkHolders );
575 }
576 // @phan-suppress-next-line PhanTypeSuspiciousNonTraversableForeach
577 foreach ( $this as $name => $value ) {
578 unset( $this->$name );
579 }
580 }
581
585 public function __clone() {
586 $this->mInParse = false;
587
588 // T58226: When you create a reference "to" an object field, that
589 // makes the object field itself be a reference too (until the other
590 // reference goes out of scope). When cloning, any field that's a
591 // reference is copied as a reference in the new object. Both of these
592 // are defined PHP5 behaviors, as inconvenient as it is for us when old
593 // hooks from PHP4 days are passing fields by reference.
594 foreach ( [ 'mStripState', 'mVarCache' ] as $k ) {
595 // Make a non-reference copy of the field, then rebind the field to
596 // reference the new copy.
597 $tmp = $this->$k;
598 $this->$k =& $tmp;
599 unset( $tmp );
600 }
601
602 $this->mPreprocessor = clone $this->mPreprocessor;
603 $this->mPreprocessor->resetParser( $this );
604
605 $this->hookRunner->onParserCloned( $this );
606 }
607
615 public function firstCallInit() {
616 /*
617 * This method should be hard-deprecated once remaining calls are
618 * removed; it no longer does anything.
619 */
620 }
621
627 public function clearState() {
628 $this->resetOutput();
629 $this->mAutonumber = 0;
630 $this->mLinkHolders = new LinkHolderArray(
631 $this,
632 $this->getContentLanguageConverter(),
633 $this->getHookContainer()
634 );
635 $this->mLinkID = 0;
636 $this->mRevisionTimestamp = null;
637 $this->mRevisionId = null;
638 $this->mRevisionUser = null;
639 $this->mRevisionSize = null;
640 $this->mRevisionRecordObject = null;
641 $this->mVarCache = [];
642 $this->mUser = null;
643 $this->mLangLinkLanguages = [];
644 $this->currentRevisionCache = null;
645
646 $this->mStripState = new StripState( $this );
647
648 # Clear these on every parse, T6549
649 $this->mTplRedirCache = [];
650 $this->mTplDomCache = [];
651
652 $this->mShowToc = true;
653 $this->mForceTocPosition = false;
654 $this->mIncludeSizes = [
655 'post-expand' => 0,
656 'arg' => 0,
657 ];
658 $this->mPPNodeCount = 0;
659 $this->mHighestExpansionDepth = 0;
660 $this->mHeadings = [];
661 $this->mDoubleUnderscores = [];
662 $this->mExpensiveFunctionCount = 0;
663
664 $this->mProfiler = new SectionProfiler();
665
666 $this->hookRunner->onParserClearState( $this );
667 }
668
673 public function resetOutput() {
674 $this->mOutput = new ParserOutput;
675 $this->mOptions->registerWatcher( [ $this->mOutput, 'recordOption' ] );
676 }
677
696 public function parse(
697 $text, PageReference $page, ParserOptions $options,
698 $linestart = true, $clearState = true, $revid = null
699 ) {
700 if ( $clearState ) {
701 // We use U+007F DELETE to construct strip markers, so we have to make
702 // sure that this character does not occur in the input text.
703 $text = strtr( $text, "\x7f", "?" );
704 $magicScopeVariable = $this->lock();
705 }
706 // Strip U+0000 NULL (T159174)
707 $text = str_replace( "\000", '', $text );
708
709 $this->startParse( $page, $options, self::OT_HTML, $clearState );
710
711 $this->currentRevisionCache = null;
712 $this->mInputSize = strlen( $text );
713 $this->mOutput->resetParseStartTime();
714
715 $oldRevisionId = $this->mRevisionId;
716 $oldRevisionRecordObject = $this->mRevisionRecordObject;
717 $oldRevisionTimestamp = $this->mRevisionTimestamp;
718 $oldRevisionUser = $this->mRevisionUser;
719 $oldRevisionSize = $this->mRevisionSize;
720 if ( $revid !== null ) {
721 $this->mRevisionId = $revid;
722 $this->mRevisionRecordObject = null;
723 $this->mRevisionTimestamp = null;
724 $this->mRevisionUser = null;
725 $this->mRevisionSize = null;
726 }
727
728 $text = $this->internalParse( $text );
729 $this->hookRunner->onParserAfterParse( $this, $text, $this->mStripState );
730
731 $text = $this->internalParseHalfParsed( $text, true, $linestart );
732
740 if ( !$options->getDisableTitleConversion()
741 && !isset( $this->mDoubleUnderscores['nocontentconvert'] )
742 && !isset( $this->mDoubleUnderscores['notitleconvert'] )
743 && $this->mOutput->getDisplayTitle() === false
744 ) {
745 $titleText = $this->getTargetLanguageConverter()->getConvRuleTitle();
746 if ( $titleText !== false ) {
747 $titleText = Sanitizer::removeSomeTags( $titleText );
748 } else {
749 [ $nsText, $nsSeparator, $mainText ] = $this->getTargetLanguageConverter()->convertSplitTitle( $page );
750 // In the future, those three pieces could be stored separately rather than joined into $titleText,
751 // and OutputPage would format them and join them together, to resolve T314399.
752 $titleText = self::formatPageTitle( $nsText, $nsSeparator, $mainText );
753 }
754 $this->mOutput->setTitleText( $titleText );
755 }
756
757 # Recording timing info. Must be called before finalizeAdaptiveCacheExpiry() and
758 # makeLimitReport(), which make use of the timing info.
759 $this->mOutput->recordTimeProfile();
760
761 # Compute runtime adaptive expiry if set
762 $this->mOutput->finalizeAdaptiveCacheExpiry();
763
764 # Warn if too many heavyweight parser functions were used
765 if ( $this->mExpensiveFunctionCount > $options->getExpensiveParserFunctionLimit() ) {
766 $this->limitationWarn( 'expensive-parserfunction',
767 $this->mExpensiveFunctionCount,
769 );
770 }
771
772 # Information on limits, for the benefit of users who try to skirt them
773 if ( $this->svcOptions->get( MainConfigNames::EnableParserLimitReporting ) ) {
774 $this->makeLimitReport();
775 }
776
777 $this->mOutput->setFromParserOptions( $options );
778
779 $this->mOutput->setRawText( $text );
780
781 $this->mRevisionId = $oldRevisionId;
782 $this->mRevisionRecordObject = $oldRevisionRecordObject;
783 $this->mRevisionTimestamp = $oldRevisionTimestamp;
784 $this->mRevisionUser = $oldRevisionUser;
785 $this->mRevisionSize = $oldRevisionSize;
786 $this->mInputSize = false;
787 $this->currentRevisionCache = null;
788
789 return $this->mOutput;
790 }
791
795 protected function makeLimitReport() {
796 $maxIncludeSize = $this->mOptions->getMaxIncludeSize();
797
798 $cpuTime = $this->mOutput->getTimeProfile( 'cpu' );
799 if ( $cpuTime !== null ) {
800 $this->mOutput->setLimitReportData( 'limitreport-cputime',
801 sprintf( "%.3f", $cpuTime )
802 );
803 }
804
805 $wallTime = $this->mOutput->getTimeProfile( 'wall' );
806 $this->mOutput->setLimitReportData( 'limitreport-walltime',
807 sprintf( "%.3f", $wallTime )
808 );
809
810 $this->mOutput->setLimitReportData( 'limitreport-ppvisitednodes',
811 [ $this->mPPNodeCount, $this->mOptions->getMaxPPNodeCount() ]
812 );
813 $this->mOutput->setLimitReportData( 'limitreport-postexpandincludesize',
814 [ $this->mIncludeSizes['post-expand'], $maxIncludeSize ]
815 );
816 $this->mOutput->setLimitReportData( 'limitreport-templateargumentsize',
817 [ $this->mIncludeSizes['arg'], $maxIncludeSize ]
818 );
819 $this->mOutput->setLimitReportData( 'limitreport-expansiondepth',
820 [ $this->mHighestExpansionDepth, $this->mOptions->getMaxPPExpandDepth() ]
821 );
822 $this->mOutput->setLimitReportData( 'limitreport-expensivefunctioncount',
823 [ $this->mExpensiveFunctionCount, $this->mOptions->getExpensiveParserFunctionLimit() ]
824 );
825
826 foreach ( $this->mStripState->getLimitReport() as [ $key, $value ] ) {
827 $this->mOutput->setLimitReportData( $key, $value );
828 }
829
830 $this->hookRunner->onParserLimitReportPrepare( $this, $this->mOutput );
831
832 // Add on template profiling data in human/machine readable way
833 $dataByFunc = $this->mProfiler->getFunctionStats();
834 uasort( $dataByFunc, static function ( $a, $b ) {
835 return $b['real'] <=> $a['real']; // descending order
836 } );
837 $profileReport = [];
838 foreach ( array_slice( $dataByFunc, 0, 10 ) as $item ) {
839 $profileReport[] = sprintf( "%6.2f%% %8.3f %6d %s",
840 $item['%real'], $item['real'], $item['calls'],
841 htmlspecialchars( $item['name'] ) );
842 }
843
844 $this->mOutput->setLimitReportData( 'limitreport-timingprofile', $profileReport );
845
846 // Add other cache related metadata
847 if ( $this->svcOptions->get( MainConfigNames::ShowHostnames ) ) {
848 $this->mOutput->setLimitReportData( 'cachereport-origin', wfHostname() );
849 }
850 $this->mOutput->setLimitReportData( 'cachereport-timestamp',
851 $this->mOutput->getCacheTime() );
852 $this->mOutput->setLimitReportData( 'cachereport-ttl',
853 $this->mOutput->getCacheExpiry() );
854 $this->mOutput->setLimitReportData( 'cachereport-transientcontent',
855 $this->mOutput->hasReducedExpiry() );
856 }
857
883 public function recursiveTagParse( $text, $frame = false ) {
884 $text = $this->internalParse( $text, false, $frame );
885 return $text;
886 }
887
907 public function recursiveTagParseFully( $text, $frame = false ) {
908 $text = $this->recursiveTagParse( $text, $frame );
909 $text = $this->internalParseHalfParsed( $text, false );
910 return $text;
911 }
912
932 public function parseExtensionTagAsTopLevelDoc( $text ) {
933 $text = $this->recursiveTagParse( $text );
934 $this->hookRunner->onParserAfterParse( $this, $text, $this->mStripState );
935 $text = $this->internalParseHalfParsed( $text, true );
936 return $text;
937 }
938
951 public function preprocess(
952 $text,
953 ?PageReference $page,
954 ParserOptions $options,
955 $revid = null,
956 $frame = false
957 ) {
958 $magicScopeVariable = $this->lock();
959 $this->startParse( $page, $options, self::OT_PREPROCESS, true );
960 if ( $revid !== null ) {
961 $this->mRevisionId = $revid;
962 }
963 $this->hookRunner->onParserBeforePreprocess( $this, $text, $this->mStripState );
964 $text = $this->replaceVariables( $text, $frame );
965 $text = $this->mStripState->unstripBoth( $text );
966 return $text;
967 }
968
978 public function recursivePreprocess( $text, $frame = false ) {
979 $text = $this->replaceVariables( $text, $frame );
980 $text = $this->mStripState->unstripBoth( $text );
981 return $text;
982 }
983
998 public function getPreloadText( $text, PageReference $page, ParserOptions $options, $params = [] ) {
999 $msg = new RawMessage( $text );
1000 $text = $msg->params( $params )->plain();
1001
1002 # Parser (re)initialisation
1003 $magicScopeVariable = $this->lock();
1004 $this->startParse( $page, $options, self::OT_PLAIN, true );
1005
1006 $flags = PPFrame::NO_ARGS | PPFrame::NO_TEMPLATES;
1007 $dom = $this->preprocessToDom( $text, Preprocessor::DOM_FOR_INCLUSION );
1008 $text = $this->getPreprocessor()->newFrame()->expand( $dom, $flags );
1009 $text = $this->mStripState->unstripBoth( $text );
1010 return $text;
1011 }
1012
1020 public function setUser( ?UserIdentity $user ) {
1021 $this->mUser = $user;
1022 }
1023
1031 public function setTitle( Title $t = null ) {
1032 $this->setPage( $t );
1033 }
1034
1040 public function getTitle(): Title {
1041 return $this->mTitle;
1042 }
1043
1050 public function setPage( ?PageReference $t = null ) {
1051 if ( !$t ) {
1052 $t = Title::makeTitle( NS_SPECIAL, 'Badtitle/Parser' );
1053 } else {
1054 // For now (early 1.37 alpha), always convert to Title, so we don't have to do it over
1055 // and over again in other methods. Eventually, we will no longer need to have a Title
1056 // instance internally.
1057 $t = Title::newFromPageReference( $t );
1058 }
1059
1060 if ( $t->hasFragment() ) {
1061 # Strip the fragment to avoid various odd effects
1062 $this->mTitle = $t->createFragmentTarget( '' );
1063 } else {
1064 $this->mTitle = $t;
1065 }
1066 }
1067
1073 public function getPage(): ?PageReference {
1074 if ( $this->mTitle->isSpecial( 'Badtitle' ) ) {
1075 [ , $subPage ] = $this->specialPageFactory->resolveAlias( $this->mTitle->getDBkey() );
1076
1077 if ( $subPage === 'Missing' ) {
1078 wfDeprecated( __METHOD__ . ' without a Title set', '1.34' );
1079 return null;
1080 }
1081 }
1082
1083 return $this->mTitle;
1084 }
1085
1091 public function getOutputType(): int {
1092 return $this->mOutputType;
1093 }
1094
1100 public function setOutputType( $ot ): void {
1101 $this->mOutputType = $ot;
1102 # Shortcut alias
1103 $this->ot = [
1104 'html' => $ot == self::OT_HTML,
1105 'wiki' => $ot == self::OT_WIKI,
1106 'pre' => $ot == self::OT_PREPROCESS,
1107 'plain' => $ot == self::OT_PLAIN,
1108 ];
1109 }
1110
1118 public function OutputType( $x = null ) {
1119 wfDeprecated( __METHOD__, '1.35' );
1120 return wfSetVar( $this->mOutputType, $x );
1121 }
1122
1127 public function getOutput() {
1128 // @phan-suppress-next-line PhanRedundantCondition False positive, see https://github.com/phan/phan/issues/4720
1129 if ( !isset( $this->mOutput ) ) {
1130 wfDeprecated( __METHOD__ . ' before initialization', '1.42' );
1131 // @phan-suppress-next-line PhanTypeMismatchReturnProbablyReal We don’t want to tell anyone we’re doing this
1132 return null;
1133 }
1134 return $this->mOutput;
1135 }
1136
1141 public function getOptions() {
1142 return $this->mOptions;
1143 }
1144
1150 public function setOptions( ParserOptions $options ): void {
1151 $this->mOptions = $options;
1152 }
1153
1161 public function Options( $x = null ) {
1162 wfDeprecated( __METHOD__, '1.35' );
1163 return wfSetVar( $this->mOptions, $x );
1164 }
1165
1170 public function nextLinkID() {
1171 return $this->mLinkID++;
1172 }
1173
1178 public function setLinkID( $id ) {
1179 $this->mLinkID = $id;
1180 }
1181
1188 public function getFunctionLang() {
1189 wfDeprecated( __METHOD__, '1.40' );
1190 return $this->getTargetLanguage();
1191 }
1192
1201 public function getTargetLanguage() {
1202 $target = $this->mOptions->getTargetLanguage();
1203
1204 if ( $target !== null ) {
1205 return $target;
1206 } elseif ( $this->mOptions->getInterfaceMessage() ) {
1207 return $this->mOptions->getUserLangObj();
1208 }
1209
1210 return $this->getTitle()->getPageLanguage();
1211 }
1212
1220 public function getUserIdentity(): UserIdentity {
1221 return $this->mUser ?? $this->getOptions()->getUserIdentity();
1222 }
1223
1230 public function getPreprocessor() {
1231 return $this->mPreprocessor;
1232 }
1233
1240 public function getLinkRenderer() {
1241 // XXX We make the LinkRenderer with current options and then cache it forever
1242 if ( !$this->mLinkRenderer ) {
1243 $this->mLinkRenderer = $this->linkRendererFactory->create();
1244 }
1245
1246 return $this->mLinkRenderer;
1247 }
1248
1255 public function getMagicWordFactory() {
1256 return $this->magicWordFactory;
1257 }
1258
1265 public function getContentLanguage() {
1266 return $this->contLang;
1267 }
1268
1275 public function getBadFileLookup() {
1276 return $this->badFileLookup;
1277 }
1278
1298 public static function extractTagsAndParams( array $elements, $text, &$matches ) {
1299 static $n = 1;
1300 $stripped = '';
1301 $matches = [];
1302
1303 $taglist = implode( '|', $elements );
1304 $start = "/<($taglist)(\\s+[^>]*?|\\s*?)(\/?>)|<(!--)/i";
1305
1306 while ( $text != '' ) {
1307 $p = preg_split( $start, $text, 2, PREG_SPLIT_DELIM_CAPTURE );
1308 $stripped .= $p[0];
1309 if ( count( $p ) < 5 ) {
1310 break;
1311 }
1312 if ( count( $p ) > 5 ) {
1313 # comment
1314 $element = $p[4];
1315 $attributes = '';
1316 $close = '';
1317 $inside = $p[5];
1318 } else {
1319 # tag
1320 [ , $element, $attributes, $close, $inside ] = $p;
1321 }
1322
1323 $marker = self::MARKER_PREFIX . "-$element-" . sprintf( '%08X', $n++ ) . self::MARKER_SUFFIX;
1324 $stripped .= $marker;
1325
1326 if ( $close === '/>' ) {
1327 # Empty element tag, <tag />
1328 $content = null;
1329 $text = $inside;
1330 $tail = null;
1331 } else {
1332 if ( $element === '!--' ) {
1333 $end = '/(-->)/';
1334 } else {
1335 $end = "/(<\\/$element\\s*>)/i";
1336 }
1337 $q = preg_split( $end, $inside, 2, PREG_SPLIT_DELIM_CAPTURE );
1338 $content = $q[0];
1339 if ( count( $q ) < 3 ) {
1340 # No end tag -- let it run out to the end of the text.
1341 $tail = '';
1342 $text = '';
1343 } else {
1344 [ , $tail, $text ] = $q;
1345 }
1346 }
1347
1348 $matches[$marker] = [ $element,
1349 $content,
1350 Sanitizer::decodeTagAttributes( $attributes ),
1351 "<$element$attributes$close$content$tail" ];
1352 }
1353 return $stripped;
1354 }
1355
1361 public function getStripList() {
1362 return $this->mStripList;
1363 }
1364
1369 public function getStripState() {
1370 return $this->mStripState;
1371 }
1372
1382 public function insertStripItem( $text ) {
1383 $marker = self::MARKER_PREFIX . "-item-{$this->mMarkerIndex}-" . self::MARKER_SUFFIX;
1384 $this->mMarkerIndex++;
1385 $this->mStripState->addGeneral( $marker, $text );
1386 return $marker;
1387 }
1388
1395 private function handleTables( $text ) {
1396 $lines = StringUtils::explode( "\n", $text );
1397 $out = '';
1398 $td_history = []; # Is currently a td tag open?
1399 $last_tag_history = []; # Save history of last lag activated (td, th or caption)
1400 $tr_history = []; # Is currently a tr tag open?
1401 $tr_attributes = []; # history of tr attributes
1402 $has_opened_tr = []; # Did this table open a <tr> element?
1403 $indent_level = 0; # indent level of the table
1404
1405 foreach ( $lines as $outLine ) {
1406 $line = trim( $outLine );
1407
1408 if ( $line === '' ) { # empty line, go to next line
1409 $out .= $outLine . "\n";
1410 continue;
1411 }
1412
1413 $first_character = $line[0];
1414 $first_two = substr( $line, 0, 2 );
1415 $matches = [];
1416
1417 if ( preg_match( '/^(:*)\s*\{\|(.*)$/', $line, $matches ) ) {
1418 # First check if we are starting a new table
1419 $indent_level = strlen( $matches[1] );
1420
1421 $attributes = $this->mStripState->unstripBoth( $matches[2] );
1422 $attributes = Sanitizer::fixTagAttributes( $attributes, 'table' );
1423
1424 $outLine = str_repeat( '<dl><dd>', $indent_level ) . "<table{$attributes}>";
1425 $td_history[] = false;
1426 $last_tag_history[] = '';
1427 $tr_history[] = false;
1428 $tr_attributes[] = '';
1429 $has_opened_tr[] = false;
1430 } elseif ( count( $td_history ) == 0 ) {
1431 # Don't do any of the following
1432 $out .= $outLine . "\n";
1433 continue;
1434 } elseif ( $first_two === '|}' ) {
1435 # We are ending a table
1436 $line = '</table>' . substr( $line, 2 );
1437 $last_tag = array_pop( $last_tag_history );
1438
1439 if ( !array_pop( $has_opened_tr ) ) {
1440 $line = "<tr><td></td></tr>{$line}";
1441 }
1442
1443 if ( array_pop( $tr_history ) ) {
1444 $line = "</tr>{$line}";
1445 }
1446
1447 if ( array_pop( $td_history ) ) {
1448 $line = "</{$last_tag}>{$line}";
1449 }
1450 array_pop( $tr_attributes );
1451 if ( $indent_level > 0 ) {
1452 $outLine = rtrim( $line ) . str_repeat( '</dd></dl>', $indent_level );
1453 } else {
1454 $outLine = $line;
1455 }
1456 } elseif ( $first_two === '|-' ) {
1457 # Now we have a table row
1458 $line = preg_replace( '#^\|-+#', '', $line );
1459
1460 # Whats after the tag is now only attributes
1461 $attributes = $this->mStripState->unstripBoth( $line );
1462 $attributes = Sanitizer::fixTagAttributes( $attributes, 'tr' );
1463 array_pop( $tr_attributes );
1464 $tr_attributes[] = $attributes;
1465
1466 $line = '';
1467 $last_tag = array_pop( $last_tag_history );
1468 array_pop( $has_opened_tr );
1469 $has_opened_tr[] = true;
1470
1471 if ( array_pop( $tr_history ) ) {
1472 $line = '</tr>';
1473 }
1474
1475 if ( array_pop( $td_history ) ) {
1476 $line = "</{$last_tag}>{$line}";
1477 }
1478
1479 $outLine = $line;
1480 $tr_history[] = false;
1481 $td_history[] = false;
1482 $last_tag_history[] = '';
1483 } elseif ( $first_character === '|'
1484 || $first_character === '!'
1485 || $first_two === '|+'
1486 ) {
1487 # This might be cell elements, td, th or captions
1488 if ( $first_two === '|+' ) {
1489 $first_character = '+';
1490 $line = substr( $line, 2 );
1491 } else {
1492 $line = substr( $line, 1 );
1493 }
1494
1495 // Implies both are valid for table headings.
1496 if ( $first_character === '!' ) {
1497 $line = StringUtils::replaceMarkup( '!!', '||', $line );
1498 }
1499
1500 # Split up multiple cells on the same line.
1501 # FIXME : This can result in improper nesting of tags processed
1502 # by earlier parser steps.
1503 $cells = explode( '||', $line );
1504
1505 $outLine = '';
1506
1507 # Loop through each table cell
1508 foreach ( $cells as $cell ) {
1509 $previous = '';
1510 if ( $first_character !== '+' ) {
1511 $tr_after = array_pop( $tr_attributes );
1512 if ( !array_pop( $tr_history ) ) {
1513 $previous = "<tr{$tr_after}>\n";
1514 }
1515 $tr_history[] = true;
1516 $tr_attributes[] = '';
1517 array_pop( $has_opened_tr );
1518 $has_opened_tr[] = true;
1519 }
1520
1521 $last_tag = array_pop( $last_tag_history );
1522
1523 if ( array_pop( $td_history ) ) {
1524 $previous = "</{$last_tag}>\n{$previous}";
1525 }
1526
1527 if ( $first_character === '|' ) {
1528 $last_tag = 'td';
1529 } elseif ( $first_character === '!' ) {
1530 $last_tag = 'th';
1531 } elseif ( $first_character === '+' ) {
1532 $last_tag = 'caption';
1533 } else {
1534 $last_tag = '';
1535 }
1536
1537 $last_tag_history[] = $last_tag;
1538
1539 # A cell could contain both parameters and data
1540 $cell_data = explode( '|', $cell, 2 );
1541
1542 # T2553: Note that a '|' inside an invalid link should not
1543 # be mistaken as delimiting cell parameters
1544 # Bug T153140: Neither should language converter markup.
1545 if ( preg_match( '/\[\[|-\{/', $cell_data[0] ) === 1 ) {
1546 $cell = "{$previous}<{$last_tag}>" . trim( $cell );
1547 } elseif ( count( $cell_data ) == 1 ) {
1548 // Whitespace in cells is trimmed
1549 $cell = "{$previous}<{$last_tag}>" . trim( $cell_data[0] );
1550 } else {
1551 $attributes = $this->mStripState->unstripBoth( $cell_data[0] );
1552 $attributes = Sanitizer::fixTagAttributes( $attributes, $last_tag );
1553 // Whitespace in cells is trimmed
1554 $cell = "{$previous}<{$last_tag}{$attributes}>" . trim( $cell_data[1] );
1555 }
1556
1557 $outLine .= $cell;
1558 $td_history[] = true;
1559 }
1560 }
1561 $out .= $outLine . "\n";
1562 }
1563
1564 # Closing open td, tr && table
1565 while ( count( $td_history ) > 0 ) {
1566 if ( array_pop( $td_history ) ) {
1567 $out .= "</td>\n";
1568 }
1569 if ( array_pop( $tr_history ) ) {
1570 $out .= "</tr>\n";
1571 }
1572 if ( !array_pop( $has_opened_tr ) ) {
1573 $out .= "<tr><td></td></tr>\n";
1574 }
1575
1576 $out .= "</table>\n";
1577 }
1578
1579 # Remove trailing line-ending (b/c)
1580 if ( substr( $out, -1 ) === "\n" ) {
1581 $out = substr( $out, 0, -1 );
1582 }
1583
1584 # special case: don't return empty table
1585 if ( $out === "<table>\n<tr><td></td></tr>\n</table>" ) {
1586 $out = '';
1587 }
1588
1589 return $out;
1590 }
1591
1605 public function internalParse( $text, $isMain = true, $frame = false ) {
1606 $origText = $text;
1607
1608 # Hook to suspend the parser in this state
1609 if ( !$this->hookRunner->onParserBeforeInternalParse( $this, $text, $this->mStripState ) ) {
1610 return $text;
1611 }
1612
1613 # if $frame is provided, then use $frame for replacing any variables
1614 if ( $frame ) {
1615 # use frame depth to infer how include/noinclude tags should be handled
1616 # depth=0 means this is the top-level document; otherwise it's an included document
1617 if ( !$frame->depth ) {
1618 $flag = 0;
1619 } else {
1620 $flag = Preprocessor::DOM_FOR_INCLUSION;
1621 }
1622 $dom = $this->preprocessToDom( $text, $flag );
1623 $text = $frame->expand( $dom );
1624 } else {
1625 # if $frame is not provided, then use old-style replaceVariables
1626 $text = $this->replaceVariables( $text );
1627 }
1628
1629 $text = Sanitizer::internalRemoveHtmlTags(
1630 $text,
1631 // Callback from the Sanitizer for expanding items found in
1632 // HTML attribute values, so they can be safely tested and escaped.
1633 function ( &$text, $frame = false ) {
1634 $text = $this->replaceVariables( $text, $frame );
1635 $text = $this->mStripState->unstripBoth( $text );
1636 },
1637 false,
1638 [],
1639 []
1640 );
1641 $this->hookRunner->onInternalParseBeforeLinks( $this, $text, $this->mStripState );
1642
1643 # Tables need to come after variable replacement for things to work
1644 # properly; putting them before other transformations should keep
1645 # exciting things like link expansions from showing up in surprising
1646 # places.
1647 $text = $this->handleTables( $text );
1648
1649 $text = preg_replace( '/(^|\n)-----*/', '\\1<hr />', $text );
1650
1651 $text = $this->handleDoubleUnderscore( $text );
1652
1653 $text = $this->handleHeadings( $text );
1654 $text = $this->handleInternalLinks( $text );
1655 $text = $this->handleAllQuotes( $text );
1656 $text = $this->handleExternalLinks( $text );
1657
1658 # handleInternalLinks may sometimes leave behind
1659 # absolute URLs, which have to be masked to hide them from handleExternalLinks
1660 $text = str_replace( self::MARKER_PREFIX . 'NOPARSE', '', $text );
1661
1662 $text = $this->handleMagicLinks( $text );
1663 $text = $this->finalizeHeadings( $text, $origText, $isMain );
1664
1665 return $text;
1666 }
1667
1675 return $this->languageConverterFactory->getLanguageConverter(
1676 $this->getTargetLanguage()
1677 );
1678 }
1679
1685 private function getContentLanguageConverter(): ILanguageConverter {
1686 return $this->languageConverterFactory->getLanguageConverter(
1687 $this->getContentLanguage()
1688 );
1689 }
1690
1698 protected function getHookContainer() {
1699 return $this->hookContainer;
1700 }
1701
1710 protected function getHookRunner() {
1711 return $this->hookRunner;
1712 }
1713
1723 private function internalParseHalfParsed( $text, $isMain = true, $linestart = true ) {
1724 $text = $this->mStripState->unstripGeneral( $text );
1725
1726 $text = BlockLevelPass::doBlockLevels( $text, $linestart );
1727
1728 $this->replaceLinkHoldersPrivate( $text );
1729
1737 $converter = null;
1738 if ( !( $this->mOptions->getDisableContentConversion()
1739 || isset( $this->mDoubleUnderscores['nocontentconvert'] )
1740 || $this->mOptions->getInterfaceMessage() )
1741 ) {
1742 # The position of the convert() call should not be changed. it
1743 # assumes that the links are all replaced and the only thing left
1744 # is the <nowiki> mark.
1745 $converter = $this->getTargetLanguageConverter();
1746 $text = $converter->convert( $text );
1747 // TOC will be converted below.
1748 }
1749 // Convert the TOC. This is done *after* the main text
1750 // so that all the editor-defined conversion rules (by convention
1751 // defined at the start of the article) are applied to the TOC
1752 self::localizeTOC(
1753 $this->mOutput->getTOCData(),
1754 $this->getTargetLanguage(),
1755 $converter // null if conversion is to be suppressed.
1756 );
1757 if ( $converter ) {
1758 $this->mOutput->setLanguage( new Bcp47CodeValue(
1759 LanguageCode::bcp47( $converter->getPreferredVariant() )
1760 ) );
1761 } else {
1762 $this->mOutput->setLanguage( $this->getTargetLanguage() );
1763 }
1764
1765 $text = $this->mStripState->unstripNoWiki( $text );
1766
1767 $text = $this->mStripState->unstripGeneral( $text );
1768
1769 $text = $this->tidy->tidy( $text, [ Sanitizer::class, 'armorFrenchSpaces' ] );
1770
1771 if ( $isMain ) {
1772 $this->hookRunner->onParserAfterTidy( $this, $text );
1773 }
1774
1775 return $text;
1776 }
1777
1788 private function handleMagicLinks( $text ) {
1789 $prots = $this->urlUtils->validAbsoluteProtocols();
1790 $urlChar = self::EXT_LINK_URL_CLASS;
1791 $addr = self::EXT_LINK_ADDR;
1792 $space = self::SPACE_NOT_NL; # non-newline space
1793 $spdash = "(?:-|$space)"; # a dash or a non-newline space
1794 $spaces = "$space++"; # possessive match of 1 or more spaces
1795 $text = preg_replace_callback(
1796 '!(?: # Start cases
1797 (<a[ \t\r\n>].*?</a>) | # m[1]: Skip link text
1798 (<.*?>) | # m[2]: Skip stuff inside HTML elements' . "
1799 (\b # m[3]: Free external links
1800 (?i:$prots)
1801 ($addr$urlChar*) # m[4]: Post-protocol path
1802 ) |
1803 \b(?:RFC|PMID) $spaces # m[5]: RFC or PMID, capture number
1804 ([0-9]+)\b |
1805 \bISBN $spaces ( # m[6]: ISBN, capture number
1806 (?: 97[89] $spdash? )? # optional 13-digit ISBN prefix
1807 (?: [0-9] $spdash? ){9} # 9 digits with opt. delimiters
1808 [0-9Xx] # check digit
1809 )\b
1810 )!xu",
1811 [ $this, 'magicLinkCallback' ],
1812 $text
1813 );
1814 return $text;
1815 }
1816
1821 private function magicLinkCallback( array $m ) {
1822 if ( isset( $m[1] ) && $m[1] !== '' ) {
1823 # Skip anchor
1824 return $m[0];
1825 } elseif ( isset( $m[2] ) && $m[2] !== '' ) {
1826 # Skip HTML element
1827 return $m[0];
1828 } elseif ( isset( $m[3] ) && $m[3] !== '' ) {
1829 # Free external link
1830 return $this->makeFreeExternalLink( $m[0], strlen( $m[4] ) );
1831 } elseif ( isset( $m[5] ) && $m[5] !== '' ) {
1832 # RFC or PMID
1833 if ( substr( $m[0], 0, 3 ) === 'RFC' ) {
1834 if ( !$this->mOptions->getMagicRFCLinks() ) {
1835 return $m[0];
1836 }
1837 $keyword = 'RFC';
1838 $urlmsg = 'rfcurl';
1839 $cssClass = 'mw-magiclink-rfc';
1840 $trackingCat = 'magiclink-tracking-rfc';
1841 $id = $m[5];
1842 } elseif ( substr( $m[0], 0, 4 ) === 'PMID' ) {
1843 if ( !$this->mOptions->getMagicPMIDLinks() ) {
1844 return $m[0];
1845 }
1846 $keyword = 'PMID';
1847 $urlmsg = 'pubmedurl';
1848 $cssClass = 'mw-magiclink-pmid';
1849 $trackingCat = 'magiclink-tracking-pmid';
1850 $id = $m[5];
1851 } else {
1852 // Should never happen
1853 throw new UnexpectedValueException( __METHOD__ . ': unrecognised match type "' .
1854 substr( $m[0], 0, 20 ) . '"' );
1855 }
1856 $url = wfMessage( $urlmsg, $id )->inContentLanguage()->text();
1857 $this->addTrackingCategory( $trackingCat );
1858 return $this->getLinkRenderer()->makeExternalLink(
1859 $url,
1860 "{$keyword} {$id}",
1861 $this->getTitle(),
1862 $cssClass,
1863 []
1864 );
1865 } elseif ( isset( $m[6] ) && $m[6] !== ''
1866 && $this->mOptions->getMagicISBNLinks()
1867 ) {
1868 # ISBN
1869 $isbn = $m[6];
1870 $space = self::SPACE_NOT_NL; # non-newline space
1871 $isbn = preg_replace( "/$space/", ' ', $isbn );
1872 $num = strtr( $isbn, [
1873 '-' => '',
1874 ' ' => '',
1875 'x' => 'X',
1876 ] );
1877 $this->addTrackingCategory( 'magiclink-tracking-isbn' );
1878 return $this->getLinkRenderer()->makeKnownLink(
1879 SpecialPage::getTitleFor( 'Booksources', $num ),
1880 "ISBN $isbn",
1881 [
1882 'class' => 'internal mw-magiclink-isbn',
1883 'title' => false // suppress title attribute
1884 ]
1885 );
1886 } else {
1887 return $m[0];
1888 }
1889 }
1890
1900 private function makeFreeExternalLink( $url, $numPostProto ) {
1901 $trail = '';
1902
1903 # The characters '<' and '>' (which were escaped by
1904 # internalRemoveHtmlTags()) should not be included in
1905 # URLs, per RFC 2396.
1906 # Make &nbsp; terminate a URL as well (bug T84937)
1907 $m2 = [];
1908 if ( preg_match(
1909 '/&(lt|gt|nbsp|#x0*(3[CcEe]|[Aa]0)|#0*(60|62|160));/',
1910 $url,
1911 $m2,
1912 PREG_OFFSET_CAPTURE
1913 ) ) {
1914 $trail = substr( $url, $m2[0][1] ) . $trail;
1915 $url = substr( $url, 0, $m2[0][1] );
1916 }
1917
1918 # Move trailing punctuation to $trail
1919 $sep = ',;\.:!?';
1920 # If there is no left bracket, then consider right brackets fair game too
1921 if ( strpos( $url, '(' ) === false ) {
1922 $sep .= ')';
1923 }
1924
1925 $urlRev = strrev( $url );
1926 $numSepChars = strspn( $urlRev, $sep );
1927 # Don't break a trailing HTML entity by moving the ; into $trail
1928 # This is in hot code, so use substr_compare to avoid having to
1929 # create a new string object for the comparison
1930 if ( $numSepChars && substr_compare( $url, ";", -$numSepChars, 1 ) === 0 ) {
1931 # more optimization: instead of running preg_match with a $
1932 # anchor, which can be slow, do the match on the reversed
1933 # string starting at the desired offset.
1934 # un-reversed regexp is: /&([a-z]+|#x[\da-f]+|#\d+)$/i
1935 if ( preg_match( '/\G([a-z]+|[\da-f]+x#|\d+#)&/i', $urlRev, $m2, 0, $numSepChars ) ) {
1936 $numSepChars--;
1937 }
1938 }
1939 if ( $numSepChars ) {
1940 $trail = substr( $url, -$numSepChars ) . $trail;
1941 $url = substr( $url, 0, -$numSepChars );
1942 }
1943
1944 # Verify that we still have a real URL after trail removal, and
1945 # not just lone protocol
1946 if ( strlen( $trail ) >= $numPostProto ) {
1947 return $url . $trail;
1948 }
1949
1950 $url = Sanitizer::cleanUrl( $url );
1951
1952 # Is this an external image?
1953 $text = $this->maybeMakeExternalImage( $url );
1954 if ( $text === false ) {
1955 # Not an image, make a link
1956 $text = $this->getLinkRenderer()->makeExternalLink(
1957 $url,
1958 $this->getTargetLanguageConverter()->markNoConversion( $url ),
1959 $this->getTitle(),
1960 'free',
1961 $this->getExternalLinkAttribs( $url )
1962 );
1963 # Register it in the output object...
1964 $this->mOutput->addExternalLink( $url );
1965 }
1966 return $text . $trail;
1967 }
1968
1975 private function handleHeadings( $text ) {
1976 for ( $i = 6; $i >= 1; --$i ) {
1977 $h = str_repeat( '=', $i );
1978 // Trim non-newline whitespace from headings
1979 // Using \s* will break for: "==\n===\n" and parse as <h2>=</h2>
1980 $text = preg_replace( "/^(?:$h)[ \\t]*(.+?)[ \\t]*(?:$h)\\s*$/m", "<h$i>\\1</h$i>", $text );
1981 }
1982 return $text;
1983 }
1984
1992 private function handleAllQuotes( $text ) {
1993 $outtext = '';
1994 $lines = StringUtils::explode( "\n", $text );
1995 foreach ( $lines as $line ) {
1996 $outtext .= $this->doQuotes( $line ) . "\n";
1997 }
1998 $outtext = substr( $outtext, 0, -1 );
1999 return $outtext;
2000 }
2001
2010 public function doQuotes( $text ) {
2011 $arr = preg_split( "/(''+)/", $text, -1, PREG_SPLIT_DELIM_CAPTURE );
2012 $countarr = count( $arr );
2013 if ( $countarr == 1 ) {
2014 return $text;
2015 }
2016
2017 // First, do some preliminary work. This may shift some apostrophes from
2018 // being mark-up to being text. It also counts the number of occurrences
2019 // of bold and italics mark-ups.
2020 $numbold = 0;
2021 $numitalics = 0;
2022 for ( $i = 1; $i < $countarr; $i += 2 ) {
2023 $thislen = strlen( $arr[$i] );
2024 // If there are ever four apostrophes, assume the first is supposed to
2025 // be text, and the remaining three constitute mark-up for bold text.
2026 // (T15227: ''''foo'''' turns into ' ''' foo ' ''')
2027 if ( $thislen == 4 ) {
2028 $arr[$i - 1] .= "'";
2029 $arr[$i] = "'''";
2030 $thislen = 3;
2031 } elseif ( $thislen > 5 ) {
2032 // If there are more than 5 apostrophes in a row, assume they're all
2033 // text except for the last 5.
2034 // (T15227: ''''''foo'''''' turns into ' ''''' foo ' ''''')
2035 $arr[$i - 1] .= str_repeat( "'", $thislen - 5 );
2036 $arr[$i] = "'''''";
2037 $thislen = 5;
2038 }
2039 // Count the number of occurrences of bold and italics mark-ups.
2040 if ( $thislen == 2 ) {
2041 $numitalics++;
2042 } elseif ( $thislen == 3 ) {
2043 $numbold++;
2044 } elseif ( $thislen == 5 ) {
2045 $numitalics++;
2046 $numbold++;
2047 }
2048 }
2049
2050 // If there is an odd number of both bold and italics, it is likely
2051 // that one of the bold ones was meant to be an apostrophe followed
2052 // by italics. Which one we cannot know for certain, but it is more
2053 // likely to be one that has a single-letter word before it.
2054 if ( ( $numbold % 2 == 1 ) && ( $numitalics % 2 == 1 ) ) {
2055 $firstsingleletterword = -1;
2056 $firstmultiletterword = -1;
2057 $firstspace = -1;
2058 for ( $i = 1; $i < $countarr; $i += 2 ) {
2059 if ( strlen( $arr[$i] ) == 3 ) {
2060 $x1 = substr( $arr[$i - 1], -1 );
2061 $x2 = substr( $arr[$i - 1], -2, 1 );
2062 if ( $x1 === ' ' ) {
2063 if ( $firstspace == -1 ) {
2064 $firstspace = $i;
2065 }
2066 } elseif ( $x2 === ' ' ) {
2067 $firstsingleletterword = $i;
2068 // if $firstsingleletterword is set, we don't
2069 // look at the other options, so we can bail early.
2070 break;
2071 } elseif ( $firstmultiletterword == -1 ) {
2072 $firstmultiletterword = $i;
2073 }
2074 }
2075 }
2076
2077 // If there is a single-letter word, use it!
2078 if ( $firstsingleletterword > -1 ) {
2079 $arr[$firstsingleletterword] = "''";
2080 $arr[$firstsingleletterword - 1] .= "'";
2081 } elseif ( $firstmultiletterword > -1 ) {
2082 // If not, but there's a multi-letter word, use that one.
2083 $arr[$firstmultiletterword] = "''";
2084 $arr[$firstmultiletterword - 1] .= "'";
2085 } elseif ( $firstspace > -1 ) {
2086 // ... otherwise use the first one that has neither.
2087 // (notice that it is possible for all three to be -1 if, for example,
2088 // there is only one pentuple-apostrophe in the line)
2089 $arr[$firstspace] = "''";
2090 $arr[$firstspace - 1] .= "'";
2091 }
2092 }
2093
2094 // Now let's actually convert our apostrophic mush to HTML!
2095 $output = '';
2096 $buffer = '';
2097 $state = '';
2098 $i = 0;
2099 foreach ( $arr as $r ) {
2100 if ( ( $i % 2 ) == 0 ) {
2101 if ( $state === 'both' ) {
2102 $buffer .= $r;
2103 } else {
2104 $output .= $r;
2105 }
2106 } else {
2107 $thislen = strlen( $r );
2108 if ( $thislen == 2 ) {
2109 // two quotes - open or close italics
2110 if ( $state === 'i' ) {
2111 $output .= '</i>';
2112 $state = '';
2113 } elseif ( $state === 'bi' ) {
2114 $output .= '</i>';
2115 $state = 'b';
2116 } elseif ( $state === 'ib' ) {
2117 $output .= '</b></i><b>';
2118 $state = 'b';
2119 } elseif ( $state === 'both' ) {
2120 $output .= '<b><i>' . $buffer . '</i>';
2121 $state = 'b';
2122 } else { // $state can be 'b' or ''
2123 $output .= '<i>';
2124 $state .= 'i';
2125 }
2126 } elseif ( $thislen == 3 ) {
2127 // three quotes - open or close bold
2128 if ( $state === 'b' ) {
2129 $output .= '</b>';
2130 $state = '';
2131 } elseif ( $state === 'bi' ) {
2132 $output .= '</i></b><i>';
2133 $state = 'i';
2134 } elseif ( $state === 'ib' ) {
2135 $output .= '</b>';
2136 $state = 'i';
2137 } elseif ( $state === 'both' ) {
2138 $output .= '<i><b>' . $buffer . '</b>';
2139 $state = 'i';
2140 } else { // $state can be 'i' or ''
2141 $output .= '<b>';
2142 $state .= 'b';
2143 }
2144 } elseif ( $thislen == 5 ) {
2145 // five quotes - open or close both separately
2146 if ( $state === 'b' ) {
2147 $output .= '</b><i>';
2148 $state = 'i';
2149 } elseif ( $state === 'i' ) {
2150 $output .= '</i><b>';
2151 $state = 'b';
2152 } elseif ( $state === 'bi' ) {
2153 $output .= '</i></b>';
2154 $state = '';
2155 } elseif ( $state === 'ib' ) {
2156 $output .= '</b></i>';
2157 $state = '';
2158 } elseif ( $state === 'both' ) {
2159 $output .= '<i><b>' . $buffer . '</b></i>';
2160 $state = '';
2161 } else { // ($state == '')
2162 $buffer = '';
2163 $state = 'both';
2164 }
2165 }
2166 }
2167 $i++;
2168 }
2169 // Now close all remaining tags. Notice that the order is important.
2170 if ( $state === 'b' || $state === 'ib' ) {
2171 $output .= '</b>';
2172 }
2173 if ( $state === 'i' || $state === 'bi' || $state === 'ib' ) {
2174 $output .= '</i>';
2175 }
2176 if ( $state === 'bi' ) {
2177 $output .= '</b>';
2178 }
2179 // There might be lonely ''''', so make sure we have a buffer
2180 if ( $state === 'both' && $buffer ) {
2181 $output .= '<b><i>' . $buffer . '</i></b>';
2182 }
2183 return $output;
2184 }
2185
2195 private function handleExternalLinks( $text ) {
2196 $bits = preg_split( $this->mExtLinkBracketedRegex, $text, -1, PREG_SPLIT_DELIM_CAPTURE );
2197 // @phan-suppress-next-line PhanTypeComparisonFromArray See phan issue #3161
2198 if ( $bits === false ) {
2199 throw new RuntimeException( "PCRE failure" );
2200 }
2201 $s = array_shift( $bits );
2202
2203 $i = 0;
2204 while ( $i < count( $bits ) ) {
2205 $url = $bits[$i++];
2206 $i++; // protocol
2207 $text = $bits[$i++];
2208 $trail = $bits[$i++];
2209
2210 # The characters '<' and '>' (which were escaped by
2211 # internalRemoveHtmlTags()) should not be included in
2212 # URLs, per RFC 2396.
2213 $m2 = [];
2214 if ( preg_match( '/&(lt|gt);/', $url, $m2, PREG_OFFSET_CAPTURE ) ) {
2215 $text = substr( $url, $m2[0][1] ) . ' ' . $text;
2216 $url = substr( $url, 0, $m2[0][1] );
2217 }
2218
2219 # If the link text is an image URL, replace it with an <img> tag
2220 # This happened by accident in the original parser, but some people used it extensively
2221 $img = $this->maybeMakeExternalImage( $text );
2222 if ( $img !== false ) {
2223 $text = $img;
2224 }
2225
2226 $dtrail = '';
2227
2228 # Set linktype for CSS
2229 $linktype = 'text';
2230
2231 # No link text, e.g. [http://domain.tld/some.link]
2232 if ( $text == '' ) {
2233 # Autonumber
2234 $langObj = $this->getTargetLanguage();
2235 $text = '[' . $langObj->formatNum( ++$this->mAutonumber ) . ']';
2236 $linktype = 'autonumber';
2237 } else {
2238 # Have link text, e.g. [http://domain.tld/some.link text]s
2239 # Check for trail
2240 [ $dtrail, $trail ] = Linker::splitTrail( $trail );
2241 }
2242
2243 // Excluding protocol-relative URLs may avoid many false positives.
2244 if ( preg_match( '/^(?:' . $this->urlUtils->validAbsoluteProtocols() . ')/', $text ) ) {
2245 $text = $this->getTargetLanguageConverter()->markNoConversion( $text );
2246 }
2247
2248 $url = Sanitizer::cleanUrl( $url );
2249
2250 # Use the encoded URL
2251 # This means that users can paste URLs directly into the text
2252 # Funny characters like ö aren't valid in URLs anyway
2253 # This was changed in August 2004
2254 $s .= $this->getLinkRenderer()->makeExternalLink(
2255 $url,
2256 // @phan-suppress-next-line SecurityCheck-XSS
2257 new HtmlArmor( $text ),
2258 $this->getTitle(),
2259 $linktype,
2260 $this->getExternalLinkAttribs( $url )
2261 ) . $dtrail . $trail;
2262
2263 # Register link in the output object.
2264 $this->mOutput->addExternalLink( $url );
2265 }
2266
2267 // @phan-suppress-next-line PhanTypeMismatchReturnNullable False positive from array_shift
2268 return $s;
2269 }
2270
2281 public static function getExternalLinkRel( $url = false, LinkTarget $title = null ) {
2282 $mainConfig = MediaWikiServices::getInstance()->getMainConfig();
2283 $noFollowLinks = $mainConfig->get( MainConfigNames::NoFollowLinks );
2284 $noFollowNsExceptions = $mainConfig->get( MainConfigNames::NoFollowNsExceptions );
2285 $noFollowDomainExceptions = $mainConfig->get( MainConfigNames::NoFollowDomainExceptions );
2286 $ns = $title ? $title->getNamespace() : false;
2287 if ( $noFollowLinks && !in_array( $ns, $noFollowNsExceptions )
2288 && !wfMatchesDomainList( $url, $noFollowDomainExceptions )
2289 ) {
2290 return 'nofollow';
2291 }
2292 return null;
2293 }
2294
2306 public function getExternalLinkAttribs( $url ) {
2307 $attribs = [];
2308 $rel = self::getExternalLinkRel( $url, $this->getTitle() ) ?? '';
2309
2310 $target = $this->mOptions->getExternalLinkTarget();
2311 if ( $target ) {
2312 $attribs['target'] = $target;
2313 if ( !in_array( $target, [ '_self', '_parent', '_top' ] ) ) {
2314 // T133507. New windows can navigate parent cross-origin.
2315 // Including noreferrer due to lacking browser
2316 // support of noopener. Eventually noreferrer should be removed.
2317 if ( $rel !== '' ) {
2318 $rel .= ' ';
2319 }
2320 $rel .= 'noreferrer noopener';
2321 }
2322 }
2323 if ( $rel !== '' ) {
2324 $attribs['rel'] = $rel;
2325 }
2326 return $attribs;
2327 }
2328
2339 public static function normalizeLinkUrl( $url ) {
2340 # Test for RFC 3986 IPv6 syntax
2341 $scheme = '[a-z][a-z0-9+.-]*:';
2342 $userinfo = '(?:[a-z0-9\-._~!$&\'()*+,;=:]|%[0-9a-f]{2})*';
2343 $ipv6Host = '\\[((?:[0-9a-f:]|%3[0-A]|%[46][1-6])+)\\]';
2344 if ( preg_match( "<^(?:{$scheme})?//(?:{$userinfo}@)?{$ipv6Host}(?:[:/?#].*|)$>i", $url, $m ) &&
2345 IPUtils::isValid( rawurldecode( $m[1] ) )
2346 ) {
2347 $isIPv6 = rawurldecode( $m[1] );
2348 } else {
2349 $isIPv6 = false;
2350 }
2351
2352 # Make sure unsafe characters are encoded
2353 $url = preg_replace_callback(
2354 '/[\x00-\x20"<>\[\\\\\]^`{|}\x7F-\xFF]+/',
2355 static fn ( $m ) => rawurlencode( $m[0] ),
2356 $url
2357 );
2358
2359 $ret = '';
2360 $end = strlen( $url );
2361
2362 # Fragment part - 'fragment'
2363 $start = strpos( $url, '#' );
2364 if ( $start !== false && $start < $end ) {
2365 $ret = self::normalizeUrlComponent(
2366 substr( $url, $start, $end - $start ), '"#%<>[\]^`{|}' ) . $ret;
2367 $end = $start;
2368 }
2369
2370 # Query part - 'query' minus &=+;
2371 $start = strpos( $url, '?' );
2372 if ( $start !== false && $start < $end ) {
2373 $ret = self::normalizeUrlComponent(
2374 substr( $url, $start, $end - $start ), '"#%<>[\]^`{|}&=+;' ) . $ret;
2375 $end = $start;
2376 }
2377
2378 # Path part - 'pchar', remove dot segments
2379 # (find first '/' after the optional '//' after the scheme)
2380 $start = strpos( $url, '//' );
2381 $start = strpos( $url, '/', $start === false ? 0 : $start + 2 );
2382 if ( $start !== false && $start < $end ) {
2383 $ret = UrlUtils::removeDotSegments( self::normalizeUrlComponent(
2384 substr( $url, $start, $end - $start ), '"#%<>[\]^`{|}/?' ) ) . $ret;
2385 $end = $start;
2386 }
2387
2388 # Scheme and host part - 'pchar'
2389 # (we assume no userinfo or encoded colons in the host)
2390 $ret = self::normalizeUrlComponent(
2391 substr( $url, 0, $end ), '"#%<>[\]^`{|}/?' ) . $ret;
2392
2393 # Fix IPv6 syntax
2394 if ( $isIPv6 !== false ) {
2395 $ipv6Host = "%5B({$isIPv6})%5D";
2396 $ret = preg_replace(
2397 "<^((?:{$scheme})?//(?:{$userinfo}@)?){$ipv6Host}(?=[:/?#]|$)>i",
2398 "$1[$2]",
2399 $ret
2400 );
2401 }
2402
2403 return $ret;
2404 }
2405
2406 private static function normalizeUrlComponent( $component, $unsafe ) {
2407 $callback = static function ( $matches ) use ( $unsafe ) {
2408 $char = urldecode( $matches[0] );
2409 $ord = ord( $char );
2410 if ( $ord > 32 && $ord < 127 && strpos( $unsafe, $char ) === false ) {
2411 # Unescape it
2412 return $char;
2413 } else {
2414 # Leave it escaped, but use uppercase for a-f
2415 return strtoupper( $matches[0] );
2416 }
2417 };
2418 return preg_replace_callback( '/%[0-9A-Fa-f]{2}/', $callback, $component );
2419 }
2420
2429 private function maybeMakeExternalImage( $url ) {
2430 $imagesfrom = $this->mOptions->getAllowExternalImagesFrom();
2431 $imagesexception = (bool)$imagesfrom;
2432 $text = false;
2433 # $imagesfrom could be either a single string or an array of strings, parse out the latter
2434 if ( $imagesexception && is_array( $imagesfrom ) ) {
2435 $imagematch = false;
2436 foreach ( $imagesfrom as $match ) {
2437 if ( strpos( $url, $match ) === 0 ) {
2438 $imagematch = true;
2439 break;
2440 }
2441 }
2442 } elseif ( $imagesexception ) {
2443 $imagematch = ( strpos( $url, $imagesfrom ) === 0 );
2444 } else {
2445 $imagematch = false;
2446 }
2447
2448 if ( $this->mOptions->getAllowExternalImages()
2449 || ( $imagesexception && $imagematch )
2450 ) {
2451 if ( preg_match( self::EXT_IMAGE_REGEX, $url ) ) {
2452 # Image found
2453 $text = Linker::makeExternalImage( $url );
2454 }
2455 }
2456 if ( !$text && $this->mOptions->getEnableImageWhitelist()
2457 && preg_match( self::EXT_IMAGE_REGEX, $url )
2458 ) {
2459 $whitelist = explode(
2460 "\n",
2461 wfMessage( 'external_image_whitelist' )->inContentLanguage()->text()
2462 );
2463
2464 foreach ( $whitelist as $entry ) {
2465 # Sanitize the regex fragment, make it case-insensitive, ignore blank entries/comments
2466 if ( strpos( $entry, '#' ) === 0 || $entry === '' ) {
2467 continue;
2468 }
2469 // @phan-suppress-next-line SecurityCheck-ReDoS preg_quote is not wanted here
2470 if ( preg_match( '/' . str_replace( '/', '\\/', $entry ) . '/i', $url ) ) {
2471 # Image matches a whitelist entry
2472 $text = Linker::makeExternalImage( $url );
2473 break;
2474 }
2475 }
2476 }
2477 return $text;
2478 }
2479
2487 private function handleInternalLinks( $text ) {
2488 $this->mLinkHolders->merge( $this->handleInternalLinks2( $text ) );
2489 return $text;
2490 }
2491
2497 private function handleInternalLinks2( &$s ) {
2498 static $tc = false, $e1, $e1_img;
2499 # the % is needed to support urlencoded titles as well
2500 if ( !$tc ) {
2501 $tc = Title::legalChars() . '#%';
2502 # Match a link having the form [[namespace:link|alternate]]trail
2503 $e1 = "/^([{$tc}]+)(?:\\|(.+?))?]](.*)\$/sD";
2504 # Match cases where there is no "]]", which might still be images
2505 $e1_img = "/^([{$tc}]+)\\|(.*)\$/sD";
2506 }
2507
2508 $holders = new LinkHolderArray(
2509 $this,
2510 $this->getContentLanguageConverter(),
2511 $this->getHookContainer() );
2512
2513 # split the entire text string on occurrences of [[
2514 $a = StringUtils::explode( '[[', ' ' . $s );
2515 # get the first element (all text up to first [[), and remove the space we added
2516 $s = $a->current();
2517 $a->next();
2518 $line = $a->current(); # Workaround for broken ArrayIterator::next() that returns "void"
2519 $s = substr( $s, 1 );
2520
2521 $nottalk = !$this->getTitle()->isTalkPage();
2522
2523 $useLinkPrefixExtension = $this->getTargetLanguage()->linkPrefixExtension();
2524 $e2 = null;
2525 if ( $useLinkPrefixExtension ) {
2526 # Match the end of a line for a word that's not followed by whitespace,
2527 # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
2528 $charset = $this->contLang->linkPrefixCharset();
2529 $e2 = "/^((?>.*[^$charset]|))(.+)$/sDu";
2530 $m = [];
2531 if ( preg_match( $e2, $s, $m ) ) {
2532 $first_prefix = $m[2];
2533 } else {
2534 $first_prefix = false;
2535 }
2536 $prefix = false;
2537 } else {
2538 $first_prefix = false;
2539 $prefix = '';
2540 }
2541
2542 # Some namespaces don't allow subpages
2543 $useSubpages = $this->nsInfo->hasSubpages(
2544 $this->getTitle()->getNamespace()
2545 );
2546
2547 # Loop for each link
2548 for ( ; $line !== false && $line !== null; $a->next(), $line = $a->current() ) {
2549 # Check for excessive memory usage
2550 if ( $holders->isBig() ) {
2551 # Too big
2552 # Do the existence check, replace the link holders and clear the array
2553 $holders->replace( $s );
2554 $holders->clear();
2555 }
2556
2557 if ( $useLinkPrefixExtension ) {
2558 // @phan-suppress-next-line PhanTypeMismatchArgumentNullableInternal $e2 is set under this condition
2559 if ( preg_match( $e2, $s, $m ) ) {
2560 [ , $s, $prefix ] = $m;
2561 } else {
2562 $prefix = '';
2563 }
2564 # first link
2565 if ( $first_prefix ) {
2566 $prefix = $first_prefix;
2567 $first_prefix = false;
2568 }
2569 }
2570
2571 $might_be_img = false;
2572
2573 if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
2574 $text = $m[2];
2575 # If we get a ] at the beginning of $m[3] that means we have a link that's something like:
2576 # [[Image:Foo.jpg|[http://example.com desc]]] <- having three ] in a row fucks up,
2577 # the real problem is with the $e1 regex
2578 # See T1500.
2579 # Still some problems for cases where the ] is meant to be outside punctuation,
2580 # and no image is in sight. See T4095.
2581 if ( $text !== ''
2582 && substr( $m[3], 0, 1 ) === ']'
2583 && strpos( $text, '[' ) !== false
2584 ) {
2585 $text .= ']'; # so that handleExternalLinks($text) works later
2586 $m[3] = substr( $m[3], 1 );
2587 }
2588 # fix up urlencoded title texts
2589 if ( strpos( $m[1], '%' ) !== false ) {
2590 # Should anchors '#' also be rejected?
2591 $m[1] = str_replace( [ '<', '>' ], [ '&lt;', '&gt;' ], rawurldecode( $m[1] ) );
2592 }
2593 $trail = $m[3];
2594 } elseif ( preg_match( $e1_img, $line, $m ) ) {
2595 # Invalid, but might be an image with a link in its caption
2596 $might_be_img = true;
2597 $text = $m[2];
2598 if ( strpos( $m[1], '%' ) !== false ) {
2599 $m[1] = str_replace( [ '<', '>' ], [ '&lt;', '&gt;' ], rawurldecode( $m[1] ) );
2600 }
2601 $trail = "";
2602 } else { # Invalid form; output directly
2603 $s .= $prefix . '[[' . $line;
2604 continue;
2605 }
2606
2607 // @phan-suppress-next-line PhanTypePossiblyInvalidDimOffset preg_match success when reached here
2608 $origLink = ltrim( $m[1], ' ' );
2609
2610 # Don't allow internal links to pages containing
2611 # PROTO: where PROTO is a valid URL protocol; these
2612 # should be external links.
2613 if ( preg_match( '/^(?i:' . $this->urlUtils->validProtocols() . ')/', $origLink ) ) {
2614 $s .= $prefix . '[[' . $line;
2615 continue;
2616 }
2617
2618 # Make subpage if necessary
2619 if ( $useSubpages ) {
2620 $link = Linker::normalizeSubpageLink(
2621 $this->getTitle(), $origLink, $text
2622 );
2623 } else {
2624 $link = $origLink;
2625 }
2626
2627 // \x7f isn't a default legal title char, so most likely strip
2628 // markers will force us into the "invalid form" path above. But,
2629 // just in case, let's assert that xmlish tags aren't valid in
2630 // the title position.
2631 $unstrip = $this->mStripState->killMarkers( $link );
2632 $noMarkers = ( $unstrip === $link );
2633
2634 $nt = $noMarkers ? Title::newFromText( $link ) : null;
2635 if ( $nt === null ) {
2636 $s .= $prefix . '[[' . $line;
2637 continue;
2638 }
2639
2640 $ns = $nt->getNamespace();
2641 $iw = $nt->getInterwiki();
2642
2643 $noforce = ( substr( $origLink, 0, 1 ) !== ':' );
2644
2645 if ( $might_be_img ) { # if this is actually an invalid link
2646 if ( $ns === NS_FILE && $noforce ) { # but might be an image
2647 $found = false;
2648 while ( true ) {
2649 # look at the next 'line' to see if we can close it there
2650 $a->next();
2651 $next_line = $a->current();
2652 if ( $next_line === false || $next_line === null ) {
2653 break;
2654 }
2655 $m = explode( ']]', $next_line, 3 );
2656 if ( count( $m ) == 3 ) {
2657 # the first ]] closes the inner link, the second the image
2658 $found = true;
2659 $text .= "[[{$m[0]}]]{$m[1]}";
2660 $trail = $m[2];
2661 break;
2662 } elseif ( count( $m ) == 2 ) {
2663 # if there's exactly one ]] that's fine, we'll keep looking
2664 $text .= "[[{$m[0]}]]{$m[1]}";
2665 } else {
2666 # if $next_line is invalid too, we need look no further
2667 $text .= '[[' . $next_line;
2668 break;
2669 }
2670 }
2671 if ( !$found ) {
2672 # we couldn't find the end of this imageLink, so output it raw
2673 # but don't ignore what might be perfectly normal links in the text we've examined
2674 $holders->merge( $this->handleInternalLinks2( $text ) );
2675 $s .= "{$prefix}[[$link|$text";
2676 # note: no $trail, because without an end, there *is* no trail
2677 continue;
2678 }
2679 } else { # it's not an image, so output it raw
2680 $s .= "{$prefix}[[$link|$text";
2681 # note: no $trail, because without an end, there *is* no trail
2682 continue;
2683 }
2684 }
2685
2686 $wasblank = ( $text == '' );
2687 if ( $wasblank ) {
2688 $text = $link;
2689 if ( !$noforce ) {
2690 # Strip off leading ':'
2691 $text = substr( $text, 1 );
2692 }
2693 } else {
2694 # T6598 madness. Handle the quotes only if they come from the alternate part
2695 # [[Lista d''e paise d''o munno]] -> <a href="...">Lista d''e paise d''o munno</a>
2696 # [[Criticism of Harry Potter|Criticism of ''Harry Potter'']]
2697 # -> <a href="Criticism of Harry Potter">Criticism of <i>Harry Potter</i></a>
2698 $text = $this->doQuotes( $text );
2699 }
2700
2701 # Link not escaped by : , create the various objects
2702 if ( $noforce && !$nt->wasLocalInterwiki() ) {
2703 # Interwikis
2704 if (
2705 $iw && $this->mOptions->getInterwikiMagic() && $nottalk && (
2706 $this->languageNameUtils->getLanguageName(
2707 $iw,
2708 LanguageNameUtils::AUTONYMS,
2709 LanguageNameUtils::DEFINED
2710 )
2711 || in_array( $iw, $this->svcOptions->get( MainConfigNames::ExtraInterlanguageLinkPrefixes ) )
2712 )
2713 ) {
2714 # T26502: filter duplicates
2715 if ( !isset( $this->mLangLinkLanguages[$iw] ) ) {
2716 $this->mLangLinkLanguages[$iw] = true;
2717 $this->mOutput->addLanguageLink( $nt );
2718 }
2719
2724 $s = preg_replace( '/\n\s*$/', '', $s . $prefix ) . $trail;
2725 continue;
2726 }
2727
2728 if ( $ns === NS_FILE ) {
2729 if ( $wasblank ) {
2730 # if no parameters were passed, $text
2731 # becomes something like "File:Foo.png",
2732 # which we don't want to pass on to the
2733 # image generator
2734 $text = '';
2735 } else {
2736 # recursively parse links inside the image caption
2737 # actually, this will parse them in any other parameters, too,
2738 # but it might be hard to fix that, and it doesn't matter ATM
2739 $text = $this->handleExternalLinks( $text );
2740 $holders->merge( $this->handleInternalLinks2( $text ) );
2741 }
2742 # cloak any absolute URLs inside the image markup, so handleExternalLinks() won't touch them
2743 $s .= $prefix . $this->armorLinks(
2744 $this->makeImage( $nt, $text, $holders ) ) . $trail;
2745 continue;
2746 } elseif ( $ns === NS_CATEGORY ) {
2747 # Strip newlines from the left hand context of Category
2748 # links.
2749 # See T2087, T87753, T174639, T359886
2750 $s = preg_replace( '/\n\s*$/', '', $s . $prefix ) . $trail;
2751
2752 $sortkey = ''; // filled in by CategoryLinksTable
2753 if ( !$wasblank ) {
2754 $sortkey = $text;
2755 }
2756 $this->mOutput->addCategory( $nt, $sortkey );
2757
2758 continue;
2759 }
2760 }
2761
2762 # Self-link checking. For some languages, variants of the title are checked in
2763 # LinkHolderArray::doVariants() to allow batching the existence checks necessary
2764 # for linking to a different variant.
2765 if ( $ns !== NS_SPECIAL && $nt->equals( $this->getTitle() ) ) {
2766 $s .= $prefix . Linker::makeSelfLinkObj( $nt, $text, '', $trail, '',
2767 Sanitizer::escapeIdForLink( $nt->getFragment() ) );
2768 continue;
2769 }
2770
2771 # NS_MEDIA is a pseudo-namespace for linking directly to a file
2772 # @todo FIXME: Should do batch file existence checks, see comment below
2773 if ( $ns === NS_MEDIA ) {
2774 # Give extensions a chance to select the file revision for us
2775 $options = [];
2776 $descQuery = false;
2777 $this->hookRunner->onBeforeParserFetchFileAndTitle(
2778 // @phan-suppress-next-line PhanTypeMismatchArgument Type mismatch on pass-by-ref args
2779 $this, $nt, $options, $descQuery
2780 );
2781 # Fetch and register the file (file title may be different via hooks)
2782 [ $file, $nt ] = $this->fetchFileAndTitle( $nt, $options );
2783 # Cloak with NOPARSE to avoid replacement in handleExternalLinks
2784 $s .= $prefix . $this->armorLinks(
2785 Linker::makeMediaLinkFile( $nt, $file, $text ) ) . $trail;
2786 continue;
2787 }
2788
2789 # Some titles, such as valid special pages or files in foreign repos, should
2790 # be shown as bluelinks even though they're not included in the page table
2791 # @todo FIXME: isAlwaysKnown() can be expensive for file links; we should really do
2792 # batch file existence checks for NS_FILE and NS_MEDIA
2793 if ( $iw == '' && $nt->isAlwaysKnown() ) {
2794 $this->mOutput->addLink( $nt );
2795 $s .= $this->makeKnownLinkHolder( $nt, $text, $trail, $prefix );
2796 } else {
2797 # Links will be added to the output link list after checking
2798 $s .= $holders->makeHolder( $nt, $text, $trail, $prefix );
2799 }
2800 }
2801 return $holders;
2802 }
2803
2817 private function makeKnownLinkHolder( LinkTarget $nt, $text = '', $trail = '', $prefix = '' ) {
2818 [ $inside, $trail ] = Linker::splitTrail( $trail );
2819
2820 if ( $text == '' ) {
2821 $text = htmlspecialchars( $this->titleFormatter->getPrefixedText( $nt ) );
2822 }
2823
2824 $link = $this->getLinkRenderer()->makeKnownLink(
2825 $nt, new HtmlArmor( "$prefix$text$inside" )
2826 );
2827
2828 return $this->armorLinks( $link ) . $trail;
2829 }
2830
2841 private function armorLinks( $text ) {
2842 return preg_replace( '/\b((?i)' . $this->urlUtils->validProtocols() . ')/',
2843 self::MARKER_PREFIX . "NOPARSE$1", $text );
2844 }
2845
2855 public function doBlockLevels( $text, $linestart ) {
2856 wfDeprecated( __METHOD__, '1.35' );
2857 return BlockLevelPass::doBlockLevels( $text, $linestart );
2858 }
2859
2868 private function expandMagicVariable( $index, $frame = false ) {
2873 if ( isset( $this->mVarCache[$index] ) ) {
2874 return $this->mVarCache[$index];
2875 }
2876
2877 $ts = new MWTimestamp( $this->mOptions->getTimestamp() /* TS_MW */ );
2878 if ( $this->hookContainer->isRegistered( 'ParserGetVariableValueTs' ) ) {
2879 $s = $ts->getTimestamp( TS_UNIX );
2880 $this->hookRunner->onParserGetVariableValueTs( $this, $s );
2881 $ts = new MWTimestamp( $s );
2882 }
2883
2884 $value = CoreMagicVariables::expand(
2885 $this, $index, $ts, $this->svcOptions, $this->logger
2886 );
2887
2888 if ( $value === null ) {
2889 // Not a defined core magic word
2890 // Don't give this hook unrestricted access to mVarCache
2891 $fakeCache = [];
2892 $this->hookRunner->onParserGetVariableValueSwitch(
2893 // @phan-suppress-next-line PhanTypeMismatchArgument $value is passed as null but returned as string
2894 $this, $fakeCache, $index, $value, $frame
2895 );
2896 // Cache the value returned by the hook by falling through here.
2897 // Assert the the hook returned a non-null value for this MV
2898 '@phan-var string $value';
2899 }
2900
2901 $this->mVarCache[$index] = $value;
2902
2903 return $value;
2904 }
2905
2910 private function initializeVariables() {
2911 $variableIDs = $this->magicWordFactory->getVariableIDs();
2912
2913 $this->mVariables = $this->magicWordFactory->newArray( $variableIDs );
2914 $this->mSubstWords = $this->magicWordFactory->getSubstArray();
2915 }
2916
2935 public function preprocessToDom( $text, $flags = 0 ) {
2936 return $this->getPreprocessor()->preprocessToObj( $text, $flags );
2937 }
2938
2960 public function replaceVariables( $text, $frame = false, $argsOnly = false ) {
2961 # Is there any text? Also, Prevent too big inclusions!
2962 $textSize = strlen( $text );
2963 if ( $textSize < 1 || $textSize > $this->mOptions->getMaxIncludeSize() ) {
2964 return $text;
2965 }
2966
2967 if ( $frame === false ) {
2968 $frame = $this->getPreprocessor()->newFrame();
2969 } elseif ( !( $frame instanceof PPFrame ) ) {
2971 __METHOD__ . " called using plain parameters instead of " .
2972 "a PPFrame instance. Creating custom frame.",
2973 '1.43'
2974 );
2975 $frame = $this->getPreprocessor()->newCustomFrame( $frame );
2976 }
2977
2978 $dom = $this->preprocessToDom( $text );
2979 $flags = $argsOnly ? PPFrame::NO_TEMPLATES : 0;
2980 $text = $frame->expand( $dom, $flags );
2981
2982 return $text;
2983 }
2984
3012 public function limitationWarn( $limitationType, $current = '', $max = '' ) {
3013 # does no harm if $current and $max are present but are unnecessary for the message
3014 # Not doing ->inLanguage( $this->mOptions->getUserLangObj() ), since this is shown
3015 # only during preview, and that would split the parser cache unnecessarily.
3016 $this->mOutput->addWarningMsg(
3017 "$limitationType-warning",
3018 Message::numParam( $current ),
3019 Message::numParam( $max )
3020 );
3021 $this->addTrackingCategory( "$limitationType-category" );
3022 }
3023
3037 public function braceSubstitution( array $piece, PPFrame $frame ) {
3038 // Flags
3039
3040 // $text has been filled
3041 $found = false;
3042 $text = '';
3043 // wiki markup in $text should be escaped
3044 $nowiki = false;
3045 // $text is HTML, armour it against wikitext transformation
3046 $isHTML = false;
3047 // Force interwiki transclusion to be done in raw mode not rendered
3048 $forceRawInterwiki = false;
3049 // $text is a DOM node needing expansion in a child frame
3050 $isChildObj = false;
3051 // $text is a DOM node needing expansion in the current frame
3052 $isLocalObj = false;
3053
3054 # Title object, where $text came from
3055 $title = false;
3056
3057 # $part1 is the bit before the first |, and must contain only title characters.
3058 # Various prefixes will be stripped from it later.
3059 $titleWithSpaces = $frame->expand( $piece['title'] );
3060 $part1 = trim( $titleWithSpaces );
3061 $titleText = false;
3062
3063 # Original title text preserved for various purposes
3064 $originalTitle = $part1;
3065
3066 # $args is a list of argument nodes, starting from index 0, not including $part1
3067 $args = $piece['parts'];
3068
3069 $profileSection = null; // profile templates
3070
3071 $sawDeprecatedTemplateEquals = false; // T91154
3072
3073 # SUBST
3074 // @phan-suppress-next-line PhanImpossibleCondition
3075 if ( !$found ) {
3076 $substMatch = $this->mSubstWords->matchStartAndRemove( $part1 );
3077 $part1 = trim( $part1 );
3078
3079 # Possibilities for substMatch: "subst", "safesubst" or FALSE
3080 # Decide whether to expand template or keep wikitext as-is.
3081 if ( $this->ot['wiki'] ) {
3082 if ( $substMatch === false ) {
3083 $literal = true; # literal when in PST with no prefix
3084 } else {
3085 $literal = false; # expand when in PST with subst: or safesubst:
3086 }
3087 } else {
3088 if ( $substMatch == 'subst' ) {
3089 $literal = true; # literal when not in PST with plain subst:
3090 } else {
3091 $literal = false; # expand when not in PST with safesubst: or no prefix
3092 }
3093 }
3094 if ( $literal ) {
3095 $text = $frame->virtualBracketedImplode( '{{', '|', '}}', $titleWithSpaces, $args );
3096 $isLocalObj = true;
3097 $found = true;
3098 }
3099 }
3100
3101 # Variables
3102 if ( !$found && $args->getLength() == 0 ) {
3103 $id = $this->mVariables->matchStartToEnd( $part1 );
3104 if ( $id !== false ) {
3105 if ( strpos( $part1, ':' ) !== false ) {
3107 'Registering a magic variable with a name including a colon',
3108 '1.39', false, false
3109 );
3110 }
3111 $text = $this->expandMagicVariable( $id, $frame );
3112 $found = true;
3113 }
3114 }
3115
3116 # MSG, MSGNW and RAW
3117 if ( !$found ) {
3118 # Check for MSGNW:
3119 $mwMsgnw = $this->magicWordFactory->get( 'msgnw' );
3120 if ( $mwMsgnw->matchStartAndRemove( $part1 ) ) {
3121 $nowiki = true;
3122 } else {
3123 # Remove obsolete MSG:
3124 $mwMsg = $this->magicWordFactory->get( 'msg' );
3125 $mwMsg->matchStartAndRemove( $part1 );
3126 }
3127
3128 # Check for RAW:
3129 $mwRaw = $this->magicWordFactory->get( 'raw' );
3130 if ( $mwRaw->matchStartAndRemove( $part1 ) ) {
3131 $forceRawInterwiki = true;
3132 }
3133 }
3134
3135 # Parser functions
3136 if ( !$found ) {
3137 $colonPos = strpos( $part1, ':' );
3138 if ( $colonPos !== false ) {
3139 $func = substr( $part1, 0, $colonPos );
3140 $funcArgs = [ trim( substr( $part1, $colonPos + 1 ) ) ];
3141 $argsLength = $args->getLength();
3142 for ( $i = 0; $i < $argsLength; $i++ ) {
3143 $funcArgs[] = $args->item( $i );
3144 }
3145
3146 $result = $this->callParserFunction( $frame, $func, $funcArgs );
3147
3148 // Extract any forwarded flags
3149 if ( isset( $result['title'] ) ) {
3150 $title = $result['title'];
3151 }
3152 if ( isset( $result['found'] ) ) {
3153 $found = $result['found'];
3154 }
3155 if ( array_key_exists( 'text', $result ) ) {
3156 // a string or null
3157 $text = $result['text'];
3158 }
3159 if ( isset( $result['nowiki'] ) ) {
3160 $nowiki = $result['nowiki'];
3161 }
3162 if ( isset( $result['isHTML'] ) ) {
3163 $isHTML = $result['isHTML'];
3164 }
3165 if ( isset( $result['forceRawInterwiki'] ) ) {
3166 $forceRawInterwiki = $result['forceRawInterwiki'];
3167 }
3168 if ( isset( $result['isChildObj'] ) ) {
3169 $isChildObj = $result['isChildObj'];
3170 }
3171 if ( isset( $result['isLocalObj'] ) ) {
3172 $isLocalObj = $result['isLocalObj'];
3173 }
3174 }
3175 }
3176
3177 # Finish mangling title and then check for loops.
3178 # Set $title to a Title object and $titleText to the PDBK
3179 if ( !$found ) {
3180 $ns = NS_TEMPLATE;
3181 # Split the title into page and subpage
3182 $subpage = '';
3183 $relative = Linker::normalizeSubpageLink(
3184 $this->getTitle(), $part1, $subpage
3185 );
3186 if ( $part1 !== $relative ) {
3187 $part1 = $relative;
3188 $ns = $this->getTitle()->getNamespace();
3189 }
3190 $title = Title::newFromText( $part1, $ns );
3191 if ( $title ) {
3192 $titleText = $title->getPrefixedText();
3193 # Check for language variants if the template is not found
3194 if ( $this->getTargetLanguageConverter()->hasVariants() && $title->getArticleID() == 0 ) {
3195 $this->getTargetLanguageConverter()->findVariantLink( $part1, $title, true );
3196 }
3197 # Do recursion depth check
3198 $limit = $this->mOptions->getMaxTemplateDepth();
3199 if ( $frame->depth >= $limit ) {
3200 $found = true;
3201 $text = '<span class="error">'
3202 . wfMessage( 'parser-template-recursion-depth-warning' )
3203 ->numParams( $limit )->inContentLanguage()->text()
3204 . '</span>';
3205 }
3206 }
3207 }
3208
3209 # Load from database
3210 if ( !$found && $title ) {
3211 $profileSection = $this->mProfiler->scopedProfileIn( $title->getPrefixedDBkey() );
3212 if ( !$title->isExternal() ) {
3213 if ( $title->isSpecialPage()
3214 && $this->mOptions->getAllowSpecialInclusion()
3215 && $this->ot['html']
3216 ) {
3217 $specialPage = $this->specialPageFactory->getPage( $title->getDBkey() );
3218 // Pass the template arguments as URL parameters.
3219 // "uselang" will have no effect since the Language object
3220 // is forced to the one defined in ParserOptions.
3221 $pageArgs = [];
3222 $argsLength = $args->getLength();
3223 for ( $i = 0; $i < $argsLength; $i++ ) {
3224 $bits = $args->item( $i )->splitArg();
3225 if ( strval( $bits['index'] ) === '' ) {
3226 $name = trim( $frame->expand( $bits['name'], PPFrame::STRIP_COMMENTS ) );
3227 $value = trim( $frame->expand( $bits['value'] ) );
3228 $pageArgs[$name] = $value;
3229 }
3230 }
3231
3232 // Create a new context to execute the special page
3233 $context = new RequestContext;
3234 $context->setTitle( $title );
3235 $context->setRequest( new FauxRequest( $pageArgs ) );
3236 if ( $specialPage && $specialPage->maxIncludeCacheTime() === 0 ) {
3237 $context->setUser( $this->userFactory->newFromUserIdentity( $this->getUserIdentity() ) );
3238 } else {
3239 // If this page is cached, then we better not be per user.
3240 $context->setUser( User::newFromName( '127.0.0.1', false ) );
3241 }
3242 $context->setLanguage( $this->mOptions->getUserLangObj() );
3243 $ret = $this->specialPageFactory->capturePath( $title, $context, $this->getLinkRenderer() );
3244 if ( $ret ) {
3245 $text = $context->getOutput()->getHTML();
3246 $this->mOutput->addOutputPageMetadata( $context->getOutput() );
3247 $found = true;
3248 $isHTML = true;
3249 if ( $specialPage && $specialPage->maxIncludeCacheTime() !== false ) {
3250 $this->mOutput->updateRuntimeAdaptiveExpiry(
3251 $specialPage->maxIncludeCacheTime()
3252 );
3253 }
3254 }
3255 } elseif ( $this->nsInfo->isNonincludable( $title->getNamespace() ) ) {
3256 $found = false; # access denied
3257 $this->logger->debug(
3258 __METHOD__ .
3259 ": template inclusion denied for " . $title->getPrefixedDBkey()
3260 );
3261 } else {
3262 [ $text, $title ] = $this->getTemplateDom( $title );
3263 if ( $text !== false ) {
3264 $found = true;
3265 $isChildObj = true;
3266 if (
3267 $title->getNamespace() === NS_TEMPLATE &&
3268 $title->getDBkey() === '=' &&
3269 $originalTitle === '='
3270 ) {
3271 // Note that we won't get here if `=` is evaluated
3272 // (in the future) as a parser function, nor if
3273 // the Template namespace is given explicitly,
3274 // ie `{{Template:=}}`. Only `{{=}}` triggers.
3275 $sawDeprecatedTemplateEquals = true; // T91154
3276 }
3277 }
3278 }
3279
3280 # If the title is valid but undisplayable, make a link to it
3281 if ( !$found && ( $this->ot['html'] || $this->ot['pre'] ) ) {
3282 $text = "[[:$titleText]]";
3283 $found = true;
3284 }
3285 } elseif ( $title->isTrans() ) {
3286 # Interwiki transclusion
3287 if ( $this->ot['html'] && !$forceRawInterwiki ) {
3288 $text = $this->interwikiTransclude( $title, 'render' );
3289 $isHTML = true;
3290 } else {
3291 $text = $this->interwikiTransclude( $title, 'raw' );
3292 # Preprocess it like a template
3293 $text = $this->preprocessToDom( $text, Preprocessor::DOM_FOR_INCLUSION );
3294 $isChildObj = true;
3295 }
3296 $found = true;
3297 }
3298
3299 # Do infinite loop check
3300 # This has to be done after redirect resolution to avoid infinite loops via redirects
3301 if ( !$frame->loopCheck( $title ) ) {
3302 $found = true;
3303 $text = '<span class="error">'
3304 . wfMessage( 'parser-template-loop-warning', $titleText )->inContentLanguage()->text()
3305 . '</span>';
3306 $this->addTrackingCategory( 'template-loop-category' );
3307 $this->mOutput->addWarningMsg(
3308 'template-loop-warning',
3309 Message::plaintextParam( $titleText )
3310 );
3311 $this->logger->debug( __METHOD__ . ": template loop broken at '$titleText'" );
3312 }
3313 }
3314
3315 # If we haven't found text to substitute by now, we're done
3316 # Recover the source wikitext and return it
3317 if ( !$found ) {
3318 $text = $frame->virtualBracketedImplode( '{{', '|', '}}', $titleWithSpaces, $args );
3319 if ( $profileSection ) {
3320 $this->mProfiler->scopedProfileOut( $profileSection );
3321 }
3322 return [ 'object' => $text ];
3323 }
3324
3325 # Expand DOM-style return values in a child frame
3326 if ( $isChildObj ) {
3327 # Clean up argument array
3328 $newFrame = $frame->newChild( $args, $title );
3329
3330 if ( $nowiki ) {
3331 $text = $newFrame->expand( $text, PPFrame::RECOVER_ORIG );
3332 } elseif ( $titleText !== false && $newFrame->isEmpty() ) {
3333 # Expansion is eligible for the empty-frame cache
3334 $text = $newFrame->cachedExpand( $titleText, $text );
3335 } else {
3336 # Uncached expansion
3337 $text = $newFrame->expand( $text );
3338 }
3339 }
3340 if ( $isLocalObj && $nowiki ) {
3341 $text = $frame->expand( $text, PPFrame::RECOVER_ORIG );
3342 $isLocalObj = false;
3343 }
3344
3345 if ( $profileSection ) {
3346 $this->mProfiler->scopedProfileOut( $profileSection );
3347 }
3348 if (
3349 $sawDeprecatedTemplateEquals &&
3350 $this->mStripState->unstripBoth( $text ) !== '='
3351 ) {
3352 // T91154: {{=}} is deprecated when it doesn't expand to `=`;
3353 // use {{Template:=}} if you must.
3354 $this->addTrackingCategory( 'template-equals-category' );
3355 $this->mOutput->addWarningMsg( 'template-equals-warning' );
3356 }
3357
3358 # Replace raw HTML by a placeholder
3359 if ( $isHTML ) {
3360 // @phan-suppress-next-line SecurityCheck-XSS
3361 $text = $this->insertStripItem( $text );
3362 } elseif ( $nowiki && ( $this->ot['html'] || $this->ot['pre'] ) ) {
3363 # Escape nowiki-style return values
3364 // @phan-suppress-next-line SecurityCheck-DoubleEscaped
3365 $text = wfEscapeWikiText( $text );
3366 } elseif ( is_string( $text )
3367 && !$piece['lineStart']
3368 && preg_match( '/^(?:{\\||:|;|#|\*)/', $text )
3369 ) {
3370 # T2529: if the template begins with a table or block-level
3371 # element, it should be treated as beginning a new line.
3372 # This behavior is somewhat controversial.
3373 $text = "\n" . $text;
3374 }
3375
3376 if ( is_string( $text ) && !$this->incrementIncludeSize( 'post-expand', strlen( $text ) ) ) {
3377 # Error, oversize inclusion
3378 if ( $titleText !== false ) {
3379 # Make a working, properly escaped link if possible (T25588)
3380 $text = "[[:$titleText]]";
3381 } else {
3382 # This will probably not be a working link, but at least it may
3383 # provide some hint of where the problem is
3384 $originalTitle = preg_replace( '/^:/', '', $originalTitle );
3385 $text = "[[:$originalTitle]]";
3386 }
3387 $text .= $this->insertStripItem( '<!-- WARNING: template omitted, '
3388 . 'post-expand include size too large -->' );
3389 $this->limitationWarn( 'post-expand-template-inclusion' );
3390 }
3391
3392 if ( $isLocalObj ) {
3393 $ret = [ 'object' => $text ];
3394 } else {
3395 $ret = [ 'text' => $text ];
3396 }
3397
3398 return $ret;
3399 }
3400
3419 public function callParserFunction( PPFrame $frame, $function, array $args = [] ) {
3420 # Case sensitive functions
3421 if ( isset( $this->mFunctionSynonyms[1][$function] ) ) {
3422 $function = $this->mFunctionSynonyms[1][$function];
3423 } else {
3424 # Case insensitive functions
3425 $function = $this->contLang->lc( $function );
3426 if ( isset( $this->mFunctionSynonyms[0][$function] ) ) {
3427 $function = $this->mFunctionSynonyms[0][$function];
3428 } else {
3429 return [ 'found' => false ];
3430 }
3431 }
3432
3433 [ $callback, $flags ] = $this->mFunctionHooks[$function];
3434
3435 $allArgs = [ $this ];
3436 if ( $flags & self::SFH_OBJECT_ARGS ) {
3437 # Convert arguments to PPNodes and collect for appending to $allArgs
3438 $funcArgs = [];
3439 foreach ( $args as $k => $v ) {
3440 if ( $v instanceof PPNode || $k === 0 ) {
3441 $funcArgs[] = $v;
3442 } else {
3443 $funcArgs[] = $this->mPreprocessor->newPartNodeArray( [ $k => $v ] )->item( 0 );
3444 }
3445 }
3446
3447 # Add a frame parameter, and pass the arguments as an array
3448 $allArgs[] = $frame;
3449 $allArgs[] = $funcArgs;
3450 } else {
3451 # Convert arguments to plain text and append to $allArgs
3452 foreach ( $args as $k => $v ) {
3453 if ( $v instanceof PPNode ) {
3454 $allArgs[] = trim( $frame->expand( $v ) );
3455 } elseif ( is_int( $k ) && $k >= 0 ) {
3456 $allArgs[] = trim( $v );
3457 } else {
3458 $allArgs[] = trim( "$k=$v" );
3459 }
3460 }
3461 }
3462
3463 $result = $callback( ...$allArgs );
3464
3465 # The interface for function hooks allows them to return a wikitext
3466 # string or an array containing the string and any flags. This mungs
3467 # things around to match what this method should return.
3468 if ( !is_array( $result ) ) {
3469 $result = [
3470 'found' => true,
3471 'text' => $result,
3472 ];
3473 } else {
3474 if ( isset( $result[0] ) && !isset( $result['text'] ) ) {
3475 $result['text'] = $result[0];
3476 }
3477 unset( $result[0] );
3478 $result += [
3479 'found' => true,
3480 ];
3481 }
3482
3483 $noparse = true;
3484 $preprocessFlags = 0;
3485 if ( isset( $result['noparse'] ) ) {
3486 $noparse = $result['noparse'];
3487 }
3488 if ( isset( $result['preprocessFlags'] ) ) {
3489 $preprocessFlags = $result['preprocessFlags'];
3490 }
3491
3492 if ( !$noparse ) {
3493 $result['text'] = $this->preprocessToDom( $result['text'], $preprocessFlags );
3494 $result['isChildObj'] = true;
3495 }
3496
3497 return $result;
3498 }
3499
3509 public function getTemplateDom( LinkTarget $title ) {
3510 $cacheTitle = $title;
3511 $titleKey = CacheKeyHelper::getKeyForPage( $title );
3512
3513 if ( isset( $this->mTplRedirCache[$titleKey] ) ) {
3514 [ $ns, $dbk ] = $this->mTplRedirCache[$titleKey];
3515 $title = Title::makeTitle( $ns, $dbk );
3516 $titleKey = CacheKeyHelper::getKeyForPage( $title );
3517 }
3518 if ( isset( $this->mTplDomCache[$titleKey] ) ) {
3519 return [ $this->mTplDomCache[$titleKey], $title ];
3520 }
3521
3522 # Cache miss, go to the database
3523 [ $text, $title ] = $this->fetchTemplateAndTitle( $title );
3524
3525 if ( $text === false ) {
3526 $this->mTplDomCache[$titleKey] = false;
3527 return [ false, $title ];
3528 }
3529
3530 $dom = $this->preprocessToDom( $text, Preprocessor::DOM_FOR_INCLUSION );
3531 $this->mTplDomCache[$titleKey] = $dom;
3532
3533 if ( !$title->isSamePageAs( $cacheTitle ) ) {
3534 $this->mTplRedirCache[ CacheKeyHelper::getKeyForPage( $cacheTitle ) ] =
3535 [ $title->getNamespace(), $title->getDBkey() ];
3536 }
3537
3538 return [ $dom, $title ];
3539 }
3540
3555 $cacheKey = CacheKeyHelper::getKeyForPage( $link );
3556 if ( !$this->currentRevisionCache ) {
3557 $this->currentRevisionCache = new MapCacheLRU( 100 );
3558 }
3559 if ( !$this->currentRevisionCache->has( $cacheKey ) ) {
3560 $title = Title::newFromLinkTarget( $link ); // hook signature compat
3561 $revisionRecord =
3562 // Defaults to Parser::statelessFetchRevisionRecord()
3563 call_user_func(
3564 $this->mOptions->getCurrentRevisionRecordCallback(),
3565 $title,
3566 $this
3567 );
3568 if ( $revisionRecord === false ) {
3569 // Parser::statelessFetchRevisionRecord() can return false;
3570 // normalize it to null.
3571 $revisionRecord = null;
3572 }
3573 $this->currentRevisionCache->set( $cacheKey, $revisionRecord );
3574 }
3575 return $this->currentRevisionCache->get( $cacheKey );
3576 }
3577
3585 $key = CacheKeyHelper::getKeyForPage( $link );
3586 return (
3587 $this->currentRevisionCache &&
3588 $this->currentRevisionCache->has( $key )
3589 );
3590 }
3591
3600 public static function statelessFetchRevisionRecord( LinkTarget $link, $parser = null ) {
3601 if ( $link instanceof PageIdentity ) {
3602 // probably a Title, just use it.
3603 $page = $link;
3604 } else {
3605 // XXX: use RevisionStore::getPageForLink()!
3606 // ...but get the info for the current revision at the same time?
3607 // Should RevisionStore::getKnownCurrentRevision accept a LinkTarget?
3608 $page = Title::newFromLinkTarget( $link );
3609 }
3610
3611 $revRecord = MediaWikiServices::getInstance()
3612 ->getRevisionLookup()
3613 ->getKnownCurrentRevision( $page );
3614 return $revRecord;
3615 }
3616
3623 public function fetchTemplateAndTitle( LinkTarget $link ) {
3624 // Use Title for compatibility with callbacks and return type
3625 $title = Title::newFromLinkTarget( $link );
3626
3627 // Defaults to Parser::statelessFetchTemplate()
3628 $templateCb = $this->mOptions->getTemplateCallback();
3629 $stuff = $templateCb( $title, $this );
3630 $revRecord = $stuff['revision-record'] ?? null;
3631
3632 $text = $stuff['text'];
3633 if ( is_string( $stuff['text'] ) ) {
3634 // We use U+007F DELETE to distinguish strip markers from regular text
3635 $text = strtr( $text, "\x7f", "?" );
3636 }
3637 $finalTitle = $stuff['finalTitle'] ?? $title;
3638 foreach ( ( $stuff['deps'] ?? [] ) as $dep ) {
3639 $this->mOutput->addTemplate( $dep['title'], $dep['page_id'], $dep['rev_id'] );
3640 if ( $dep['title']->equals( $this->getTitle() ) && $revRecord instanceof RevisionRecord ) {
3641 // Self-transclusion; final result may change based on the new page version
3642 try {
3643 $sha1 = $revRecord->getSha1();
3644 } catch ( RevisionAccessException $e ) {
3645 $sha1 = null;
3646 }
3647 $this->setOutputFlag( ParserOutputFlags::VARY_REVISION_SHA1, 'Self transclusion' );
3648 $this->getOutput()->setRevisionUsedSha1Base36( $sha1 );
3649 }
3650 }
3651
3652 return [ $text, $finalTitle ];
3653 }
3654
3665 public static function statelessFetchTemplate( $page, $parser = false ) {
3666 $title = Title::castFromLinkTarget( $page ); // for compatibility with return type
3667 $text = $skip = false;
3668 $finalTitle = $title;
3669 $deps = [];
3670 $revRecord = null;
3671 $contextTitle = $parser ? $parser->getTitle() : null;
3672
3673 # Loop to fetch the article, with up to 2 redirects
3674
3675 # Note that $title (including redirect targets) could be
3676 # external; we do allow hooks a chance to redirect the
3677 # external title to a local one (which might be useful), but
3678 # are careful not to add external titles to the dependency
3679 # list. (T362221)
3680
3681 $services = MediaWikiServices::getInstance();
3682 $revLookup = $services->getRevisionLookup();
3683 $hookRunner = new HookRunner( $services->getHookContainer() );
3684 for ( $i = 0; $i < 3 && is_object( $title ); $i++ ) {
3685 # Give extensions a chance to select the revision instead
3686 $revRecord = null; # Assume no hook
3687 $origTitle = $title;
3688 $titleChanged = false;
3689 $hookRunner->onBeforeParserFetchTemplateRevisionRecord(
3690 # The $title is a not a PageIdentity, as it may
3691 # contain fragments or even represent an attempt to transclude
3692 # a broken or otherwise-missing Title, which the hook may
3693 # fix up. Similarly, the $contextTitle may represent a special
3694 # page or other page which "exists" as a parsing context but
3695 # is not in the DB.
3696 $contextTitle, $title,
3697 $skip, $revRecord
3698 );
3699
3700 if ( $skip ) {
3701 $text = false;
3702 if ( !$title->isExternal() ) {
3703 $deps[] = [
3704 'title' => $title,
3705 'page_id' => $title->getArticleID(),
3706 'rev_id' => null
3707 ];
3708 }
3709 break;
3710 }
3711 # Get the revision
3712 if ( !$revRecord ) {
3713 if ( $parser ) {
3714 $revRecord = $parser->fetchCurrentRevisionRecordOfTitle( $title );
3715 } else {
3716 $revRecord = $revLookup->getRevisionByTitle( $title );
3717 }
3718 }
3719 if ( $revRecord ) {
3720 # Update title, as $revRecord may have been changed by hook
3721 $title = Title::newFromLinkTarget(
3722 $revRecord->getPageAsLinkTarget()
3723 );
3724 // Assuming title is not external if we've got a $revRecord
3725 $deps[] = [
3726 'title' => $title,
3727 'page_id' => $revRecord->getPageId(),
3728 'rev_id' => $revRecord->getId(),
3729 ];
3730 } elseif ( !$title->isExternal() ) {
3731 $deps[] = [
3732 'title' => $title,
3733 'page_id' => $title->getArticleID(),
3734 'rev_id' => null,
3735 ];
3736 }
3737 if ( !$title->equals( $origTitle ) ) {
3738 # If we fetched a rev from a different title, register
3739 # the original title too...
3740 if ( !$origTitle->isExternal() ) {
3741 $deps[] = [
3742 'title' => $origTitle,
3743 'page_id' => $origTitle->getArticleID(),
3744 'rev_id' => null,
3745 ];
3746 }
3747 $titleChanged = true;
3748 }
3749 # If there is no current revision, there is no page
3750 if ( $revRecord === null || $revRecord->getId() === null ) {
3751 $linkCache = $services->getLinkCache();
3752 $linkCache->addBadLinkObj( $title );
3753 }
3754 if ( $revRecord ) {
3755 if ( $titleChanged && !$revRecord->hasSlot( SlotRecord::MAIN ) ) {
3756 // We've added this (missing) title to the dependencies;
3757 // give the hook another chance to redirect it to an
3758 // actual page.
3759 $text = false;
3760 $finalTitle = $title;
3761 continue;
3762 }
3763 if ( $revRecord->hasSlot( SlotRecord::MAIN ) ) { // T276476
3764 $content = $revRecord->getContent( SlotRecord::MAIN );
3765 $text = $content ? $content->getWikitextForTransclusion() : null;
3766 } else {
3767 $text = false;
3768 }
3769
3770 if ( $text === false || $text === null ) {
3771 $text = false;
3772 break;
3773 }
3774 } elseif ( $title->getNamespace() === NS_MEDIAWIKI ) {
3775 $message = wfMessage( $services->getContentLanguage()->
3776 lcfirst( $title->getText() ) )->inContentLanguage();
3777 if ( !$message->exists() ) {
3778 $text = false;
3779 break;
3780 }
3781 $text = $message->plain();
3782 break;
3783 } else {
3784 break;
3785 }
3786 // @phan-suppress-next-line PhanPossiblyUndeclaredVariable Only reached when content is set
3787 if ( !$content ) {
3788 break;
3789 }
3790 # Redirect?
3791 $finalTitle = $title;
3792 $title = $content->getRedirectTarget();
3793 }
3794
3795 $retValues = [
3796 // previously, when this also returned a Revision object, we set
3797 // 'revision-record' to false instead of null if it was unavailable,
3798 // so that callers to use isset and then rely on the revision-record
3799 // key instead of the revision key, even if there was no corresponding
3800 // object - we continue to set to false here for backwards compatability
3801 'revision-record' => $revRecord ?: false,
3802 'text' => $text,
3803 'finalTitle' => $finalTitle,
3804 'deps' => $deps
3805 ];
3806 return $retValues;
3807 }
3808
3817 public function fetchFileAndTitle( LinkTarget $link, array $options = [] ) {
3818 $file = $this->fetchFileNoRegister( $link, $options );
3819
3820 $time = $file ? $file->getTimestamp() : false;
3821 $sha1 = $file ? $file->getSha1() : false;
3822 # Register the file as a dependency...
3823 $this->mOutput->addImage( $link, $time, $sha1 );
3824 if ( $file && !$link->isSameLinkAs( $file->getTitle() ) ) {
3825 # Update fetched file title after resolving redirects, etc.
3826 $link = $file->getTitle();
3827 $this->mOutput->addImage( $link, $time, $sha1 );
3828 }
3829
3830 $title = Title::newFromLinkTarget( $link ); // for return type compat
3831 return [ $file, $title ];
3832 }
3833
3844 protected function fetchFileNoRegister( LinkTarget $link, array $options = [] ) {
3845 if ( isset( $options['broken'] ) ) {
3846 $file = false; // broken thumbnail forced by hook
3847 } else {
3848 $repoGroup = MediaWikiServices::getInstance()->getRepoGroup();
3849 if ( isset( $options['sha1'] ) ) { // get by (sha1,timestamp)
3850 $file = $repoGroup->findFileFromKey( $options['sha1'], $options );
3851 } else { // get by (name,timestamp)
3852 $file = $repoGroup->findFile( $link, $options );
3853 }
3854 }
3855 return $file;
3856 }
3857
3867 public function interwikiTransclude( LinkTarget $link, $action ) {
3868 if ( !$this->svcOptions->get( MainConfigNames::EnableScaryTranscluding ) ) {
3869 return wfMessage( 'scarytranscludedisabled' )->inContentLanguage()->text();
3870 }
3871
3872 // TODO: extract relevant functionality from Title
3873 $title = Title::newFromLinkTarget( $link );
3874
3875 $url = $title->getFullURL( [ 'action' => $action ] );
3876 if ( strlen( $url ) > 1024 ) {
3877 return wfMessage( 'scarytranscludetoolong' )->inContentLanguage()->text();
3878 }
3879
3880 $wikiId = $title->getTransWikiID(); // remote wiki ID or false
3881
3882 $fname = __METHOD__;
3883
3884 $cache = $this->wanCache;
3885 $data = $cache->getWithSetCallback(
3886 $cache->makeGlobalKey(
3887 'interwiki-transclude',
3888 ( $wikiId !== false ) ? $wikiId : 'external',
3889 sha1( $url )
3890 ),
3891 $this->svcOptions->get( MainConfigNames::TranscludeCacheExpiry ),
3892 function ( $oldValue, &$ttl ) use ( $url, $fname, $cache ) {
3893 $req = $this->httpRequestFactory->create( $url, [], $fname );
3894
3895 $status = $req->execute(); // Status object
3896 if ( !$status->isOK() ) {
3897 $ttl = $cache::TTL_UNCACHEABLE;
3898 } elseif ( $req->getResponseHeader( 'X-Database-Lagged' ) !== null ) {
3899 $ttl = min( $cache::TTL_LAGGED, $ttl );
3900 }
3901
3902 return [
3903 'text' => $status->isOK() ? $req->getContent() : null,
3904 'code' => $req->getStatus()
3905 ];
3906 },
3907 [
3908 'checkKeys' => ( $wikiId !== false )
3909 ? [ $cache->makeGlobalKey( 'interwiki-page', $wikiId, $title->getDBkey() ) ]
3910 : [],
3911 'pcGroup' => 'interwiki-transclude:5',
3912 'pcTTL' => $cache::TTL_PROC_LONG
3913 ]
3914 );
3915
3916 if ( is_string( $data['text'] ) ) {
3917 $text = $data['text'];
3918 } elseif ( $data['code'] != 200 ) {
3919 // Though we failed to fetch the content, this status is useless.
3920 $text = wfMessage( 'scarytranscludefailed-httpstatus' )
3921 ->params( $url, $data['code'] )->inContentLanguage()->text();
3922 } else {
3923 $text = wfMessage( 'scarytranscludefailed', $url )->inContentLanguage()->text();
3924 }
3925
3926 return $text;
3927 }
3928
3938 public function argSubstitution( array $piece, PPFrame $frame ) {
3939 $error = false;
3940 $parts = $piece['parts'];
3941 $nameWithSpaces = $frame->expand( $piece['title'] );
3942 $argName = trim( $nameWithSpaces );
3943 $object = false;
3944 $text = $frame->getArgument( $argName );
3945 if ( $text === false && $parts->getLength() > 0
3946 && ( $this->ot['html']
3947 || $this->ot['pre']
3948 || ( $this->ot['wiki'] && $frame->isTemplate() )
3949 )
3950 ) {
3951 # No match in frame, use the supplied default
3952 $object = $parts->item( 0 )->getChildren();
3953 }
3954 if ( !$this->incrementIncludeSize( 'arg', strlen( $text ) ) ) {
3955 $error = '<!-- WARNING: argument omitted, expansion size too large -->';
3956 $this->limitationWarn( 'post-expand-template-argument' );
3957 }
3958
3959 if ( $text === false && $object === false ) {
3960 # No match anywhere
3961 $object = $frame->virtualBracketedImplode( '{{{', '|', '}}}', $nameWithSpaces, $parts );
3962 }
3963 if ( $error !== false ) {
3964 $text .= $error;
3965 }
3966 if ( $object !== false ) {
3967 $ret = [ 'object' => $object ];
3968 } else {
3969 $ret = [ 'text' => $text ];
3970 }
3971
3972 return $ret;
3973 }
3974
3979 public function tagNeedsNowikiStrippedInTagPF( string $lowerTagName ): bool {
3980 $parsoidSiteConfig = MediaWikiServices::getInstance()->getParsoidSiteConfig();
3981 return $parsoidSiteConfig->tagNeedsNowikiStrippedInTagPF( $lowerTagName );
3982 }
3983
4003 public function extensionSubstitution( array $params, PPFrame $frame, bool $processNowiki = false ) {
4004 static $errorStr = '<span class="error">';
4005
4006 $name = $frame->expand( $params['name'] );
4007 if ( str_starts_with( $name, $errorStr ) ) {
4008 // Probably expansion depth or node count exceeded. Just punt the
4009 // error up.
4010 return $name;
4011 }
4012
4013 // Parse attributes from XML-like wikitext syntax
4014 $attrText = !isset( $params['attr'] ) ? '' : $frame->expand( $params['attr'] );
4015 if ( str_starts_with( $attrText, $errorStr ) ) {
4016 // See above
4017 return $attrText;
4018 }
4019
4020 // We can't safely check if the expansion for $content resulted in an
4021 // error, because the content could happen to be the error string
4022 // (T149622).
4023 $content = !isset( $params['inner'] ) ? null : $frame->expand( $params['inner'] );
4024
4025 $marker = self::MARKER_PREFIX . "-$name-"
4026 . sprintf( '%08X', $this->mMarkerIndex++ ) . self::MARKER_SUFFIX;
4027
4028 $normalizedName = strtolower( $name );
4029 $isNowiki = $normalizedName === 'nowiki';
4030 $markerType = $isNowiki ? 'nowiki' : 'general';
4031 if ( $this->ot['html'] || ( $processNowiki && $isNowiki ) ) {
4032 $attributes = Sanitizer::decodeTagAttributes( $attrText );
4033 // Merge in attributes passed via {{#tag:}} parser function
4034 if ( isset( $params['attributes'] ) ) {
4035 $attributes += $params['attributes'];
4036 }
4037
4038 if ( isset( $this->mTagHooks[$normalizedName] ) ) {
4039 // Note that $content may be null here, for example if the
4040 // tag is self-closed.
4041 $output = call_user_func_array( $this->mTagHooks[$normalizedName],
4042 [ $content, $attributes, $this, $frame ] );
4043 } else {
4044 $output = '<span class="error">Invalid tag extension name: ' .
4045 htmlspecialchars( $normalizedName ) . '</span>';
4046 }
4047
4048 if ( is_array( $output ) ) {
4049 // Extract flags
4050 $flags = $output;
4051 $output = $flags[0];
4052 if ( isset( $flags['markerType'] ) ) {
4053 $markerType = $flags['markerType'];
4054 }
4055 }
4056 } else {
4057 // We're substituting a {{subst:#tag:}} parser function.
4058 // Convert the attributes it passed into the XML-like string.
4059 if ( isset( $params['attributes'] ) ) {
4060 foreach ( $params['attributes'] as $attrName => $attrValue ) {
4061 $attrText .= ' ' . htmlspecialchars( $attrName ) . '="' .
4062 htmlspecialchars( $this->getStripState()->unstripBoth( $attrValue ), ENT_COMPAT ) . '"';
4063 }
4064 }
4065 if ( $content === null ) {
4066 $output = "<$name$attrText/>";
4067 } else {
4068 $close = $params['close'] === null ? '' : $frame->expand( $params['close'] );
4069 if ( str_starts_with( $close, $errorStr ) ) {
4070 // See above
4071 return $close;
4072 }
4073 $output = "<$name$attrText>$content$close";
4074 }
4075 }
4076
4077 if ( $markerType === 'none' ) {
4078 return $output;
4079 } elseif ( $markerType === 'nowiki' ) {
4080 $this->mStripState->addNoWiki( $marker, $output );
4081 } elseif ( $markerType === 'general' ) {
4082 $this->mStripState->addGeneral( $marker, $output );
4083 } else {
4084 throw new UnexpectedValueException( __METHOD__ . ': invalid marker type' );
4085 }
4086 return $marker;
4087 }
4088
4096 private function incrementIncludeSize( $type, $size ) {
4097 if ( $this->mIncludeSizes[$type] + $size > $this->mOptions->getMaxIncludeSize() ) {
4098 return false;
4099 } else {
4100 $this->mIncludeSizes[$type] += $size;
4101 return true;
4102 }
4103 }
4104
4110 $this->mExpensiveFunctionCount++;
4111 return $this->mExpensiveFunctionCount <= $this->mOptions->getExpensiveParserFunctionLimit();
4112 }
4113
4121 private function handleDoubleUnderscore( $text ) {
4122 # The position of __TOC__ needs to be recorded
4123 $mw = $this->magicWordFactory->get( 'toc' );
4124 if ( $mw->match( $text ) ) {
4125 $this->mShowToc = true;
4126 $this->mForceTocPosition = true;
4127
4128 # Set a placeholder. At the end we'll fill it in with the TOC.
4129 $text = $mw->replace( self::TOC_PLACEHOLDER, $text, 1 );
4130
4131 # Only keep the first one.
4132 $text = $mw->replace( '', $text );
4133 # For consistency with all other double-underscores
4134 # (see below)
4135 $this->mOutput->setPageProperty( 'toc', '' );
4136 }
4137
4138 # Now match and remove the rest of them
4139 $mwa = $this->magicWordFactory->getDoubleUnderscoreArray();
4140 $this->mDoubleUnderscores = $mwa->matchAndRemove( $text );
4141
4142 if ( isset( $this->mDoubleUnderscores['nogallery'] ) ) {
4143 $this->mOutput->setNoGallery( true );
4144 }
4145 if ( isset( $this->mDoubleUnderscores['notoc'] ) && !$this->mForceTocPosition ) {
4146 $this->mShowToc = false;
4147 }
4148 if ( isset( $this->mDoubleUnderscores['hiddencat'] )
4149 && $this->getTitle()->getNamespace() === NS_CATEGORY
4150 ) {
4151 $this->addTrackingCategory( 'hidden-category-category' );
4152 }
4153 # (T10068) Allow control over whether robots index a page.
4154 # __INDEX__ always overrides __NOINDEX__, see T16899
4155 if ( isset( $this->mDoubleUnderscores['noindex'] ) && $this->getTitle()->canUseNoindex() ) {
4156 $this->mOutput->setIndexPolicy( 'noindex' );
4157 $this->addTrackingCategory( 'noindex-category' );
4158 }
4159 if ( isset( $this->mDoubleUnderscores['index'] ) && $this->getTitle()->canUseNoindex() ) {
4160 $this->mOutput->setIndexPolicy( 'index' );
4161 $this->addTrackingCategory( 'index-category' );
4162 }
4163
4164 # Cache all double underscores in the database
4165 foreach ( $this->mDoubleUnderscores as $key => $val ) {
4166 $this->mOutput->setPageProperty( $key, '' );
4167 }
4168
4169 return $text;
4170 }
4171
4178 public function addTrackingCategory( $msg ) {
4179 return $this->trackingCategories->addTrackingCategory(
4180 $this->mOutput, $msg, $this->getPage()
4181 );
4182 }
4183
4197 public function msg( string $msg, ...$args ): Message {
4198 return wfMessage( $msg, ...$args )
4199 ->inLanguage( $this->getTargetLanguage() )
4200 ->page( $this->getPage() );
4201 }
4202
4203 private function cleanUpTocLine( Node $container ) {
4204 '@phan-var Element|DocumentFragment $container'; // @var Element|DocumentFragment $container
4205 # Strip out HTML
4206 # Allowed tags are:
4207 # * <sup> and <sub> (T10393)
4208 # * <i> (T28375)
4209 # * <b> (r105284)
4210 # * <bdi> (T74884)
4211 # * <span dir="rtl"> and <span dir="ltr"> (T37167)
4212 # * <s> and <strike> (T35715)
4213 # * <q> (T251672)
4214 # We strip any parameter from accepted tags, except dir="rtl|ltr" from <span>,
4215 # to allow setting directionality in toc items.
4216 $allowedTags = [ 'span', 'sup', 'sub', 'bdi', 'i', 'b', 's', 'strike', 'q' ];
4217 $node = $container->firstChild;
4218 while ( $node !== null ) {
4219 $next = $node->nextSibling;
4220 if ( $node instanceof Element ) {
4221 $nodeName = DOMCompat::nodeName( $node );
4222 if ( in_array( $nodeName, [ 'style', 'script' ], true ) ) {
4223 # Remove any <style> or <script> tags (T198618)
4224 DOMCompat::remove( $node );
4225 } elseif ( in_array( $nodeName, $allowedTags, true ) ) {
4226 // Keep tag, remove attributes
4227 $removeAttrs = [];
4228 foreach ( $node->attributes as $attr ) {
4229 if (
4230 $nodeName === 'span' && $attr->name === 'dir'
4231 && ( $attr->value === 'rtl' || $attr->value === 'ltr' )
4232 ) {
4233 // Keep <span dir="rtl"> and <span dir="ltr">
4234 continue;
4235 }
4236 $removeAttrs[] = $attr;
4237 }
4238 foreach ( $removeAttrs as $attr ) {
4239 $node->removeAttributeNode( $attr );
4240 }
4241 $this->cleanUpTocLine( $node );
4242 # Strip '<span></span>', which is the result from the above if
4243 # <span id="foo"></span> is used to produce an additional anchor
4244 # for a section.
4245 if ( $nodeName === 'span' && !$node->hasChildNodes() ) {
4246 DOMCompat::remove( $node );
4247 }
4248 } else {
4249 // Strip tag
4250 $next = $node->firstChild;
4251 while ( $childNode = $node->firstChild ) {
4252 $node->parentNode->insertBefore( $childNode, $node );
4253 }
4254 DOMCompat::remove( $node );
4255 }
4256 } elseif ( $node instanceof Comment ) {
4257 // Extensions may add comments to headings;
4258 // these shouldn't appear in the ToC either.
4259 DOMCompat::remove( $node );
4260 }
4261 $node = $next;
4262 }
4263 }
4264
4280 private function finalizeHeadings( $text, $origText, $isMain = true ) {
4281 # Inhibit editsection links if requested in the page
4282 if ( isset( $this->mDoubleUnderscores['noeditsection'] ) ) {
4283 $maybeShowEditLink = false;
4284 } else {
4285 $maybeShowEditLink = true; /* Actual presence will depend on post-cache transforms */
4286 }
4287
4288 # Get all headlines for numbering them and adding funky stuff like [edit]
4289 # links - this is for later, but we need the number of headlines right now
4290 # NOTE: white space in headings have been trimmed in handleHeadings. They shouldn't
4291 # be trimmed here since whitespace in HTML headings is significant.
4292 $matches = [];
4293 $numMatches = preg_match_all(
4294 '/<H(?P<level>[1-6])(?P<attrib>.*?>)(?P<header>[\s\S]*?)<\/H[1-6] *>/i',
4295 $text,
4296 $matches
4297 );
4298
4299 # if there are fewer than 4 headlines in the article, do not show TOC
4300 # unless it's been explicitly enabled.
4301 $enoughToc = $this->mShowToc &&
4302 ( ( $numMatches >= 4 ) || $this->mForceTocPosition );
4303
4304 # Allow user to stipulate that a page should have a "new section"
4305 # link added via __NEWSECTIONLINK__
4306 if ( isset( $this->mDoubleUnderscores['newsectionlink'] ) ) {
4307 $this->mOutput->setNewSection( true );
4308 }
4309
4310 # Allow user to remove the "new section"
4311 # link via __NONEWSECTIONLINK__
4312 if ( isset( $this->mDoubleUnderscores['nonewsectionlink'] ) ) {
4313 $this->mOutput->setHideNewSection( true );
4314 }
4315
4316 # if the string __FORCETOC__ (not case-sensitive) occurs in the HTML,
4317 # override above conditions and always show TOC above first header
4318 if ( isset( $this->mDoubleUnderscores['forcetoc'] ) ) {
4319 $this->mShowToc = true;
4320 $enoughToc = true;
4321 }
4322
4323 # headline counter
4324 $headlineCount = 0;
4325 $haveTocEntries = false;
4326
4327 # Ugh .. the TOC should have neat indentation levels which can be
4328 # passed to the skin functions. These are determined here
4329 $full = '';
4330 $head = [];
4331 $level = 0;
4332 $tocData = new TOCData();
4333 $markerRegex = self::MARKER_PREFIX . "-h-(\d+)-" . self::MARKER_SUFFIX;
4334 $baseTitleText = $this->getTitle()->getPrefixedDBkey();
4335 $oldType = $this->mOutputType;
4336 $this->setOutputType( self::OT_WIKI );
4337 $frame = $this->getPreprocessor()->newFrame();
4338 $root = $this->preprocessToDom( $origText );
4339 $node = $root->getFirstChild();
4340 $cpOffset = 0;
4341 $refers = [];
4342
4343 $headlines = $numMatches !== false ? $matches[3] : [];
4344
4345 $maxTocLevel = $this->svcOptions->get( MainConfigNames::MaxTocLevel );
4346 $domDocument = DOMUtils::parseHTML( '' );
4347 foreach ( $headlines as $headline ) {
4348 $isTemplate = false;
4349 $titleText = false;
4350 $sectionIndex = false;
4351 $markerMatches = [];
4352 if ( preg_match( "/^$markerRegex/", $headline, $markerMatches ) ) {
4353 $serial = (int)$markerMatches[1];
4354 [ $titleText, $sectionIndex ] = $this->mHeadings[$serial];
4355 $isTemplate = ( $titleText != $baseTitleText );
4356 $headline = preg_replace( "/^$markerRegex\\s*/", "", $headline );
4357 }
4358
4359 $sectionMetadata = SectionMetadata::fromLegacy( [
4360 "fromtitle" => $titleText ?: null,
4361 "index" => $sectionIndex === false
4362 ? '' : ( ( $isTemplate ? 'T-' : '' ) . $sectionIndex )
4363 ] );
4364 $tocData->addSection( $sectionMetadata );
4365
4366 $oldLevel = $level;
4367 $level = (int)$matches[1][$headlineCount];
4368 $tocData->processHeading( $oldLevel, $level, $sectionMetadata );
4369
4370 if ( $tocData->getCurrentTOCLevel() < $maxTocLevel ) {
4371 $haveTocEntries = true;
4372 }
4373
4374 # The safe header is a version of the header text safe to use for links
4375
4376 # Remove link placeholders by the link text.
4377 # <!--LINK number-->
4378 # turns into
4379 # link text with suffix
4380 # Do this before unstrip since link text can contain strip markers
4381 $safeHeadline = $this->replaceLinkHoldersText( $headline );
4382
4383 # Avoid insertion of weird stuff like <math> by expanding the relevant sections
4384 $safeHeadline = $this->mStripState->unstripBoth( $safeHeadline );
4385
4386 // Run Tidy to convert wikitext entities to HTML entities (T355386),
4387 // conveniently also giving us a way to handle French spaces (T324763)
4388 $safeHeadline = $this->tidy->tidy( $safeHeadline, [ Sanitizer::class, 'armorFrenchSpaces' ] );
4389
4390 // Parse the heading contents as HTML. This makes it easier to strip out some HTML tags,
4391 // and ensures that we generate balanced HTML at the end (T218330).
4392 $headlineDom = DOMUtils::parseHTMLToFragment( $domDocument, $safeHeadline );
4393
4394 $this->cleanUpTocLine( $headlineDom );
4395
4396 // Serialize back to HTML
4397 $tocline = trim( DOMUtils::getFragmentInnerHTML( $headlineDom ) );
4398
4399 # For the anchor, strip out HTML-y stuff period
4400 $safeHeadline = trim( $headlineDom->textContent );
4401 # Save headline for section edit hint before it's normalized for the link
4402 $headlineHint = htmlspecialchars( $safeHeadline );
4403
4404 $safeHeadline = Sanitizer::normalizeSectionNameWhitespace( $safeHeadline );
4405 $safeHeadline = self::normalizeSectionName( $safeHeadline );
4406
4407 $fallbackHeadline = Sanitizer::escapeIdForAttribute( $safeHeadline, Sanitizer::ID_FALLBACK );
4408 $linkAnchor = Sanitizer::escapeIdForLink( $safeHeadline );
4409 $safeHeadline = Sanitizer::escapeIdForAttribute( $safeHeadline, Sanitizer::ID_PRIMARY );
4410 if ( $fallbackHeadline === $safeHeadline ) {
4411 # No reason to have both (in fact, we can't)
4412 $fallbackHeadline = false;
4413 }
4414
4415 # HTML IDs must be case-insensitively unique for IE compatibility (T12721).
4416 $arrayKey = strtolower( $safeHeadline );
4417 if ( $fallbackHeadline === false ) {
4418 $fallbackArrayKey = false;
4419 } else {
4420 $fallbackArrayKey = strtolower( $fallbackHeadline );
4421 }
4422
4423 # Create the anchor for linking from the TOC to the section
4424 $anchor = $safeHeadline;
4425 $fallbackAnchor = $fallbackHeadline;
4426 if ( isset( $refers[$arrayKey] ) ) {
4427 for ( $i = 2; isset( $refers["{$arrayKey}_$i"] ); ++$i );
4428 $anchor .= "_$i";
4429 $linkAnchor .= "_$i";
4430 $refers["{$arrayKey}_$i"] = true;
4431 } else {
4432 $refers[$arrayKey] = true;
4433 }
4434 if ( $fallbackHeadline !== false && isset( $refers[$fallbackArrayKey] ) ) {
4435 for ( $i = 2; isset( $refers["{$fallbackArrayKey}_$i"] ); ++$i );
4436 $fallbackAnchor .= "_$i";
4437 $refers["{$fallbackArrayKey}_$i"] = true;
4438 } else {
4439 $refers[$fallbackArrayKey] = true;
4440 }
4441
4442 # Add the section to the section tree
4443 # Find the DOM node for this header
4444 $noOffset = ( $isTemplate || $sectionIndex === false );
4445 while ( $node && !$noOffset ) {
4446 if ( $node->getName() === 'h' ) {
4447 $bits = $node->splitHeading();
4448 if ( $bits['i'] == $sectionIndex ) {
4449 break;
4450 }
4451 }
4452 $cpOffset += mb_strlen(
4453 $this->mStripState->unstripBoth(
4454 $frame->expand( $node, PPFrame::RECOVER_ORIG )
4455 )
4456 );
4457 $node = $node->getNextSibling();
4458 }
4459 $sectionMetadata->line = $tocline;
4460 $sectionMetadata->codepointOffset = ( $noOffset ? null : $cpOffset );
4461 $sectionMetadata->anchor = $anchor;
4462 $sectionMetadata->linkAnchor = $linkAnchor;
4463
4464 if ( $maybeShowEditLink && $sectionIndex !== false ) {
4465 // Output edit section links as markers with styles that can be customized by skins
4466 if ( $isTemplate ) {
4467 # Put a T flag in the section identifier, to indicate to extractSections()
4468 # that sections inside <includeonly> should be counted.
4469 $editsectionPage = $titleText;
4470 $editsectionSection = "T-$sectionIndex";
4471 } else {
4472 $editsectionPage = $this->getTitle()->getPrefixedText();
4473 $editsectionSection = $sectionIndex;
4474 }
4475 // Construct a pseudo-HTML tag as a placeholder for the section edit link. It is replaced in
4476 // MediaWiki\OutputTransform\Stages\HandleSectionLinks with the real link.
4477 //
4478 // Any HTML markup in the input has already been escaped,
4479 // so we don't have to worry about a user trying to input one of these markers directly.
4480 //
4481 // We put the page and section in attributes to stop the language converter from
4482 // converting them, but put the headline hint in tag content
4483 // because it is supposed to be able to convert that.
4484 $editlink = '<mw:editsection page="' . htmlspecialchars( $editsectionPage, ENT_COMPAT );
4485 $editlink .= '" section="' . htmlspecialchars( $editsectionSection, ENT_COMPAT ) . '"';
4486 $editlink .= '>' . $headlineHint . '</mw:editsection>';
4487 } else {
4488 $editlink = '';
4489 }
4490 // Reconstruct the original <h#> tag with added attributes. It is replaced in
4491 // MediaWiki\OutputTransform\Stages\HandleSectionLinks to add anchors and stuff.
4492 //
4493 // data-mw-... attributes are forbidden in Sanitizer::isReservedDataAttribute(),
4494 // so we don't have to worry about a user trying to input one of these markers directly.
4495 //
4496 // We put the anchors in attributes to stop the language converter from converting them.
4497 $head[$headlineCount] = "<h$level" . Html::expandAttributes( [
4498 'data-mw-anchor' => $anchor,
4499 'data-mw-fallback-anchor' => $fallbackAnchor,
4500 ] ) . $matches['attrib'][$headlineCount] . $headline . $editlink . "</h$level>";
4501
4502 $headlineCount++;
4503 }
4504
4505 $this->setOutputType( $oldType );
4506
4507 # Never ever show TOC if no headers (or suppressed)
4508 $suppressToc = $this->mOptions->getSuppressTOC();
4509 if ( !$haveTocEntries ) {
4510 $enoughToc = false;
4511 }
4512 $addTOCPlaceholder = false;
4513
4514 if ( $isMain && !$suppressToc ) {
4515 // We generally output the section information via the API
4516 // even if there isn't "enough" of a ToC to merit showing
4517 // it -- but the "suppress TOC" parser option is set when
4518 // any sections that might be found aren't "really there"
4519 // (ie, JavaScript content that might have spurious === or
4520 // <h2>: T307691) so we will *not* set section information
4521 // in that case.
4522 $this->mOutput->setTOCData( $tocData );
4523
4524 // T294950: Record a suggestion that the TOC should be shown.
4525 // Skins are free to ignore this suggestion and implement their
4526 // own criteria for showing/suppressing TOC (T318186).
4527 if ( $enoughToc ) {
4528 $this->mOutput->setOutputFlag( ParserOutputFlags::SHOW_TOC );
4529 if ( !$this->mForceTocPosition ) {
4530 $addTOCPlaceholder = true;
4531 }
4532 }
4533
4534 // If __NOTOC__ is used on the page (and not overridden by
4535 // __TOC__ or __FORCETOC__) set the NO_TOC flag to tell
4536 // the skin that although the section information is
4537 // valid, it should perhaps not be presented as a Table Of
4538 // Contents.
4539 if ( !$this->mShowToc ) {
4540 $this->mOutput->setOutputFlag( ParserOutputFlags::NO_TOC );
4541 }
4542 }
4543
4544 # split up and insert constructed headlines
4545 $blocks = preg_split( '/<H[1-6].*?>[\s\S]*?<\/H[1-6]>/i', $text );
4546 $i = 0;
4547
4548 // build an array of document sections
4549 $sections = [];
4550 foreach ( $blocks as $block ) {
4551 // $head is zero-based, sections aren't.
4552 if ( empty( $head[$i - 1] ) ) {
4553 $sections[$i] = $block;
4554 } else {
4555 $sections[$i] = $head[$i - 1] . $block;
4556 }
4557
4558 $i++;
4559 }
4560
4561 if ( $addTOCPlaceholder ) {
4562 // append the TOC at the beginning
4563 // Top anchor now in skin
4564 // @phan-suppress-next-line PhanTypePossiblyInvalidDimOffset At least one element when enoughToc is true
4565 $sections[0] .= self::TOC_PLACEHOLDER . "\n";
4566 }
4567
4568 $full .= implode( '', $sections );
4569
4570 return $full;
4571 }
4572
4582 private static function localizeTOC(
4583 ?TOCData $tocData, Language $lang, ?ILanguageConverter $converter
4584 ) {
4585 if ( $tocData === null ) {
4586 return; // Nothing to do
4587 }
4588 foreach ( $tocData->getSections() as $s ) {
4589 // Localize heading
4590 if ( $converter ) {
4591 // T331316: don't use 'convert' or 'convertTo' as these reset
4592 // the language converter state.
4593 $s->line = $converter->convertTo(
4594 $s->line, $converter->getPreferredVariant(), false
4595 );
4596 }
4597 // Localize numbering
4598 $dot = '.';
4599 $pieces = explode( $dot, $s->number );
4600 $numbering = '';
4601 foreach ( $pieces as $i => $p ) {
4602 if ( $i > 0 ) {
4603 $numbering .= $dot;
4604 }
4605 $numbering .= $lang->formatNum( $p );
4606 }
4607 $s->number = $numbering;
4608 }
4609 }
4610
4623 public function preSaveTransform(
4624 $text,
4625 PageReference $page,
4626 UserIdentity $user,
4627 ParserOptions $options,
4628 $clearState = true
4629 ) {
4630 if ( $clearState ) {
4631 $magicScopeVariable = $this->lock();
4632 }
4633 $this->startParse( $page, $options, self::OT_WIKI, $clearState );
4634 $this->setUser( $user );
4635
4636 // Strip U+0000 NULL (T159174)
4637 $text = str_replace( "\000", '', $text );
4638
4639 // We still normalize line endings (including trimming trailing whitespace) for
4640 // backwards-compatibility with other code that just calls PST, but this should already
4641 // be handled in TextContent subclasses
4642 $text = TextContent::normalizeLineEndings( $text );
4643
4644 if ( $options->getPreSaveTransform() ) {
4645 $text = $this->pstPass2( $text, $user );
4646 }
4647 $text = $this->mStripState->unstripBoth( $text );
4648
4649 // Trim trailing whitespace again, because the previous steps can introduce it.
4650 $text = rtrim( $text );
4651
4652 $this->hookRunner->onParserPreSaveTransformComplete( $this, $text );
4653
4654 $this->setUser( null ); # Reset
4655
4656 return $text;
4657 }
4658
4667 private function pstPass2( $text, UserIdentity $user ) {
4668 # Note: This is the timestamp saved as hardcoded wikitext to the database, we use
4669 # $this->contLang here in order to give everyone the same signature and use the default one
4670 # rather than the one selected in each user's preferences. (see also T14815)
4671 $ts = $this->mOptions->getTimestamp();
4672 $timestamp = MWTimestamp::getLocalInstance( $ts );
4673 $ts = $timestamp->format( 'YmdHis' );
4674 $tzMsg = $timestamp->getTimezoneMessage()->inContentLanguage()->text();
4675
4676 $d = $this->contLang->timeanddate( $ts, false, false ) . " ($tzMsg)";
4677
4678 # Variable replacement
4679 # Because mOutputType is OT_WIKI, this will only process {{subst:xxx}} type tags
4680 $text = $this->replaceVariables( $text );
4681
4682 # This works almost by chance, as the replaceVariables are done before the getUserSig(),
4683 # which may corrupt this parser instance via its wfMessage()->text() call-
4684
4685 # Signatures
4686 if ( strpos( $text, '~~~' ) !== false ) {
4687 $sigText = $this->getUserSig( $user );
4688 $text = strtr( $text, [
4689 '~~~~~' => $d,
4690 '~~~~' => "$sigText $d",
4691 '~~~' => $sigText
4692 ] );
4693 # The main two signature forms used above are time-sensitive
4694 $this->setOutputFlag( ParserOutputFlags::USER_SIGNATURE, 'User signature detected' );
4695 }
4696
4697 # Context links ("pipe tricks"): [[|name]] and [[name (context)|]]
4698 $tc = '[' . Title::legalChars() . ']';
4699 $nc = '[ _0-9A-Za-z\x80-\xff-]'; # Namespaces can use non-ascii!
4700
4701 // [[ns:page (context)|]]
4702 $p1 = "/\[\[(:?$nc+:|:|)($tc+?)( ?\\($tc+\\))\\|]]/";
4703 // [[ns:page(context)|]] (double-width brackets, added in r40257)
4704 $p4 = "/\[\[(:?$nc+:|:|)($tc+?)( ?($tc+))\\|]]/";
4705 // [[ns:page (context), context|]] (using single, double-width or Arabic comma)
4706 $p3 = "/\[\[(:?$nc+:|:|)($tc+?)( ?\\($tc+\\)|)((?:, |,|، )$tc+|)\\|]]/";
4707 // [[|page]] (reverse pipe trick: add context from page title)
4708 $p2 = "/\[\[\\|($tc+)]]/";
4709
4710 # try $p1 first, to turn "[[A, B (C)|]]" into "[[A, B (C)|A, B]]"
4711 $text = preg_replace( $p1, '[[\\1\\2\\3|\\2]]', $text );
4712 $text = preg_replace( $p4, '[[\\1\\2\\3|\\2]]', $text );
4713 $text = preg_replace( $p3, '[[\\1\\2\\3\\4|\\2]]', $text );
4714
4715 $t = $this->getTitle()->getText();
4716 $m = [];
4717 if ( preg_match( "/^($nc+:|)$tc+?( \\($tc+\\))$/", $t, $m ) ) {
4718 $text = preg_replace( $p2, "[[$m[1]\\1$m[2]|\\1]]", $text );
4719 } elseif ( preg_match( "/^($nc+:|)$tc+?(, $tc+|)$/", $t, $m ) && "$m[1]$m[2]" != '' ) {
4720 $text = preg_replace( $p2, "[[$m[1]\\1$m[2]|\\1]]", $text );
4721 } else {
4722 # if there's no context, don't bother duplicating the title
4723 $text = preg_replace( $p2, '[[\\1]]', $text );
4724 }
4725
4726 return $text;
4727 }
4728
4744 public function getUserSig( UserIdentity $user, $nickname = false, $fancySig = null ) {
4745 $username = $user->getName();
4746
4747 # If not given, retrieve from the user object.
4748 if ( $nickname === false ) {
4749 $nickname = $this->userOptionsLookup->getOption( $user, 'nickname' );
4750 }
4751
4752 if ( $fancySig === null ) {
4753 $fancySig = $this->userOptionsLookup->getBoolOption( $user, 'fancysig' );
4754 }
4755
4756 if ( $nickname === null || $nickname === '' ) {
4757 // Empty value results in the default signature (even when fancysig is enabled)
4758 $nickname = $username;
4759 } elseif ( mb_strlen( $nickname ) > $this->svcOptions->get( MainConfigNames::MaxSigChars ) ) {
4760 $nickname = $username;
4761 $this->logger->debug( __METHOD__ . ": $username has overlong signature." );
4762 } elseif ( $fancySig !== false ) {
4763 # Sig. might contain markup; validate this
4764 $isValid = $this->validateSig( $nickname ) !== false;
4765
4766 # New validator
4767 $sigValidation = $this->svcOptions->get( MainConfigNames::SignatureValidation );
4768 if ( $isValid && $sigValidation === 'disallow' ) {
4769 $parserOpts = new ParserOptions(
4770 $this->mOptions->getUserIdentity(),
4771 $this->contLang
4772 );
4773 $validator = $this->signatureValidatorFactory
4774 ->newSignatureValidator( $user, null, $parserOpts );
4775 $isValid = !$validator->validateSignature( $nickname );
4776 }
4777
4778 if ( $isValid ) {
4779 # Validated; clean up (if needed) and return it
4780 return $this->cleanSig( $nickname, true );
4781 } else {
4782 # Failed to validate; fall back to the default
4783 $nickname = $username;
4784 $this->logger->debug( __METHOD__ . ": $username has invalid signature." );
4785 }
4786 }
4787
4788 # Make sure nickname doesnt get a sig in a sig
4789 $nickname = self::cleanSigInSig( $nickname );
4790
4791 # If we're still here, make it a link to the user page
4792 $userText = wfEscapeWikiText( $username );
4793 $nickText = wfEscapeWikiText( $nickname );
4794 if ( $this->userNameUtils->isTemp( $username ) ) {
4795 $msgName = 'signature-temp';
4796 } elseif ( $user->isRegistered() ) {
4797 $msgName = 'signature';
4798 } else {
4799 $msgName = 'signature-anon';
4800 }
4801
4802 return wfMessage( $msgName, $userText, $nickText )->inContentLanguage()
4803 ->page( $this->getPage() )->text();
4804 }
4805
4813 public function validateSig( $text ) {
4814 return Xml::isWellFormedXmlFragment( $text ) ? $text : false;
4815 }
4816
4828 public function cleanSig( $text, $parsing = false ) {
4829 if ( !$parsing ) {
4830 $magicScopeVariable = $this->lock();
4831 $this->startParse(
4832 $this->mTitle,
4833 ParserOptions::newFromUser( RequestContext::getMain()->getUser() ),
4834 self::OT_PREPROCESS,
4835 true
4836 );
4837 }
4838
4839 # Option to disable this feature
4840 if ( !$this->mOptions->getCleanSignatures() ) {
4841 return $text;
4842 }
4843
4844 # @todo FIXME: Regex doesn't respect extension tags or nowiki
4845 # => Move this logic to braceSubstitution()
4846 $substWord = $this->magicWordFactory->get( 'subst' );
4847 $substRegex = '/\{\{(?!(?:' . $substWord->getBaseRegex() . '))/x' . $substWord->getRegexCase();
4848 $substText = '{{' . $substWord->getSynonym( 0 );
4849
4850 $text = preg_replace( $substRegex, $substText, $text );
4851 $text = self::cleanSigInSig( $text );
4852 $dom = $this->preprocessToDom( $text );
4853 $frame = $this->getPreprocessor()->newFrame();
4854 $text = $frame->expand( $dom );
4855
4856 if ( !$parsing ) {
4857 $text = $this->mStripState->unstripBoth( $text );
4858 }
4859
4860 return $text;
4861 }
4862
4870 public static function cleanSigInSig( $text ) {
4871 $text = preg_replace( '/~{3,5}/', '', $text );
4872 return $text;
4873 }
4874
4891 public static function replaceTableOfContentsMarker( $text, $toc ) {
4892 $replaced = false;
4893 // remove the additional metas. while not strictly necessary, this also ensures idempotence if we run
4894 // the pass more than once on a given content and TOC markers are not inserted by $toc. At the same time,
4895 // if $toc inserts TOC markers (which, as of 2024-05, it shouldn't be able to), these are preserved by the
4896 // fact that we run a single pass with a callback (rather than doing a first replacement with the $toc and
4897 // a replacement of leftover markers as a second pass).
4898 $callback = static function ( array $matches ) use( &$replaced, $toc ): string {
4899 if ( !$replaced ) {
4900 $replaced = true;
4901 return $toc;
4902 }
4903 return '';
4904 };
4905
4906 return preg_replace_callback( self::TOC_PLACEHOLDER_REGEX, $callback, $text );
4907 }
4908
4920 public function startExternalParse( ?PageReference $page, ParserOptions $options,
4921 $outputType, $clearState = true, $revId = null
4922 ) {
4923 $this->startParse( $page, $options, $outputType, $clearState );
4924 if ( $revId !== null ) {
4925 $this->mRevisionId = $revId;
4926 }
4927 }
4928
4935 private function startParse( ?PageReference $page, ParserOptions $options,
4936 $outputType, $clearState = true
4937 ) {
4938 $this->setPage( $page );
4939 $this->mOptions = $options;
4940 $this->setOutputType( $outputType );
4941 if ( $clearState ) {
4942 $this->clearState();
4943 }
4944 }
4945
4955 public function transformMsg( $text, ParserOptions $options, ?PageReference $page = null ) {
4956 static $executing = false;
4957
4958 # Guard against infinite recursion
4959 if ( $executing ) {
4960 return $text;
4961 }
4962 $executing = true;
4963
4964 $text = $this->preprocess( $text, $page ?? $this->mTitle, $options );
4965
4966 $executing = false;
4967 return $text;
4968 }
4969
4989 public function setHook( $tag, callable $callback ) {
4990 $tag = strtolower( $tag );
4991 if ( preg_match( '/[<>\r\n]/', $tag, $m ) ) {
4992 throw new InvalidArgumentException( "Invalid character {$m[0]} in setHook('$tag', ...) call" );
4993 }
4994 $oldVal = $this->mTagHooks[$tag] ?? null;
4995 $this->mTagHooks[$tag] = $callback;
4996 if ( !in_array( $tag, $this->mStripList ) ) {
4997 $this->mStripList[] = $tag;
4998 }
4999
5000 return $oldVal;
5001 }
5002
5007 public function clearTagHooks() {
5008 $this->mTagHooks = [];
5009 $this->mStripList = [];
5010 }
5011
5055 public function setFunctionHook( $id, callable $callback, $flags = 0 ) {
5056 $oldVal = $this->mFunctionHooks[$id][0] ?? null;
5057 $this->mFunctionHooks[$id] = [ $callback, $flags ];
5058
5059 # Add to function cache
5060 $mw = $this->magicWordFactory->get( $id );
5061
5062 $synonyms = $mw->getSynonyms();
5063 $sensitive = intval( $mw->isCaseSensitive() );
5064
5065 foreach ( $synonyms as $syn ) {
5066 # Case
5067 if ( !$sensitive ) {
5068 $syn = $this->contLang->lc( $syn );
5069 }
5070 # Add leading hash
5071 if ( !( $flags & self::SFH_NO_HASH ) ) {
5072 $syn = '#' . $syn;
5073 }
5074 # Remove trailing colon
5075 if ( substr( $syn, -1, 1 ) === ':' ) {
5076 $syn = substr( $syn, 0, -1 );
5077 }
5078 $this->mFunctionSynonyms[$sensitive][$syn] = $id;
5079 }
5080 return $oldVal;
5081 }
5082
5089 public function getFunctionHooks() {
5090 return array_keys( $this->mFunctionHooks );
5091 }
5092
5100 public function replaceLinkHolders( &$text ) {
5101 $this->replaceLinkHoldersPrivate( $text );
5102 }
5103
5110 private function replaceLinkHoldersPrivate( &$text ) {
5111 $this->mLinkHolders->replace( $text );
5112 }
5113
5121 private function replaceLinkHoldersText( $text ) {
5122 return $this->mLinkHolders->replaceText( $text );
5123 }
5124
5139 public function renderImageGallery( $text, array $params ) {
5140 $mode = false;
5141 if ( isset( $params['mode'] ) ) {
5142 $mode = $params['mode'];
5143 }
5144
5145 try {
5146 $ig = ImageGalleryBase::factory( $mode );
5147 } catch ( ImageGalleryClassNotFoundException $e ) {
5148 // If invalid type set, fallback to default.
5149 $ig = ImageGalleryBase::factory( false );
5150 }
5151
5152 $ig->setContextTitle( $this->getTitle() );
5153 $ig->setShowBytes( false );
5154 $ig->setShowDimensions( false );
5155 $ig->setShowFilename( false );
5156 $ig->setParser( $this );
5157 $ig->setHideBadImages();
5158 $ig->setAttributes( Sanitizer::validateTagAttributes( $params, 'ul' ) );
5159
5160 if ( isset( $params['showfilename'] ) ) {
5161 $ig->setShowFilename( true );
5162 } else {
5163 $ig->setShowFilename( false );
5164 }
5165 if ( isset( $params['caption'] ) ) {
5166 // NOTE: We aren't passing a frame here or below. Frame info
5167 // is currently opaque to Parsoid, which acts on OT_PREPROCESS.
5168 // See T107332#4030581
5169 $caption = $this->recursiveTagParse( $params['caption'] );
5170 $ig->setCaptionHtml( $caption );
5171 }
5172 if ( isset( $params['perrow'] ) ) {
5173 $ig->setPerRow( $params['perrow'] );
5174 }
5175 if ( isset( $params['widths'] ) ) {
5176 $ig->setWidths( $params['widths'] );
5177 }
5178 if ( isset( $params['heights'] ) ) {
5179 $ig->setHeights( $params['heights'] );
5180 }
5181 $ig->setAdditionalOptions( $params );
5182
5183 $enableLegacyMediaDOM = $this->svcOptions->get( MainConfigNames::ParserEnableLegacyMediaDOM );
5184
5185 $lines = StringUtils::explode( "\n", $text );
5186 foreach ( $lines as $line ) {
5187 # match lines like these:
5188 # Image:someimage.jpg|This is some image
5189 $matches = [];
5190 preg_match( "/^([^|]+)(\\|(.*))?$/", $line, $matches );
5191 # Skip empty lines
5192 if ( count( $matches ) == 0 ) {
5193 continue;
5194 }
5195
5196 if ( strpos( $matches[0], '%' ) !== false ) {
5197 $matches[1] = rawurldecode( $matches[1] );
5198 }
5199 $title = Title::newFromText( $matches[1], NS_FILE );
5200 if ( $title === null ) {
5201 # Bogus title. Ignore these so we don't bomb out later.
5202 continue;
5203 }
5204
5205 # We need to get what handler the file uses, to figure out parameters.
5206 # Note, a hook can override the file name, and chose an entirely different
5207 # file (which potentially could be of a different type and have different handler).
5208 $options = [];
5209 $descQuery = false;
5210 $this->hookRunner->onBeforeParserFetchFileAndTitle(
5211 // @phan-suppress-next-line PhanTypeMismatchArgument Type mismatch on pass-by-ref args
5212 $this, $title, $options, $descQuery
5213 );
5214 # Don't register it now, as TraditionalImageGallery does that later.
5215 $file = $this->fetchFileNoRegister( $title, $options );
5216 $handler = $file ? $file->getHandler() : false;
5217
5218 $paramMap = [
5219 'img_alt' => 'gallery-internal-alt',
5220 'img_link' => 'gallery-internal-link',
5221 ];
5222 if ( $handler ) {
5223 $paramMap += $handler->getParamMap();
5224 // We don't want people to specify per-image widths.
5225 // Additionally the width parameter would need special casing anyhow.
5226 unset( $paramMap['img_width'] );
5227 }
5228
5229 $mwArray = $this->magicWordFactory->newArray( array_keys( $paramMap ) );
5230
5231 $label = '';
5232 $alt = null;
5233 $handlerOptions = [];
5234 $imageOptions = [];
5235 $hasAlt = false;
5236
5237 if ( isset( $matches[3] ) ) {
5238 // look for an |alt= definition while trying not to break existing
5239 // captions with multiple pipes (|) in it, until a more sensible grammar
5240 // is defined for images in galleries
5241
5242 // FIXME: Doing recursiveTagParse at this stage is a bit odd,
5243 // and different from makeImage.
5244 $matches[3] = $this->recursiveTagParse( $matches[3] );
5245 // Protect LanguageConverter markup
5246 $parameterMatches = StringUtils::delimiterExplode(
5247 '-{', '}-',
5248 '|',
5249 $matches[3],
5250 true /* nested */
5251 );
5252
5253 foreach ( $parameterMatches as $parameterMatch ) {
5254 [ $magicName, $match ] = $mwArray->matchVariableStartToEnd( trim( $parameterMatch ) );
5255 if ( !$magicName ) {
5256 // Last pipe wins.
5257 $label = $parameterMatch;
5258 continue;
5259 }
5260
5261 $paramName = $paramMap[$magicName];
5262 switch ( $paramName ) {
5263 case 'gallery-internal-alt':
5264 $hasAlt = true;
5265 $alt = $this->stripAltText( $match, false );
5266 break;
5267 case 'gallery-internal-link':
5268 $linkValue = $this->stripAltText( $match, false );
5269 if ( preg_match( '/^-{R\|(.*)}-$/', $linkValue ) ) {
5270 // Result of LanguageConverter::markNoConversion
5271 // invoked on an external link.
5272 $linkValue = substr( $linkValue, 4, -2 );
5273 }
5274 [ $type, $target ] = $this->parseLinkParameter( $linkValue );
5275 if ( $type ) {
5276 if ( $type === 'no-link' ) {
5277 $target = true;
5278 }
5279 $imageOptions[$type] = $target;
5280 }
5281 break;
5282 default:
5283 // Must be a handler specific parameter.
5284 if ( $handler->validateParam( $paramName, $match ) ) {
5285 $handlerOptions[$paramName] = $match;
5286 } else {
5287 // Guess not, consider it as caption.
5288 $this->logger->debug(
5289 "$parameterMatch failed parameter validation" );
5290 $label = $parameterMatch;
5291 }
5292 }
5293 }
5294 }
5295
5296 // Match makeImage when !$hasVisibleCaption
5297 if ( !$hasAlt ) {
5298 if ( $label !== '' ) {
5299 $alt = $this->stripAltText( $label, false );
5300 } else {
5301 if ( $enableLegacyMediaDOM ) {
5302 $alt = $title->getText();
5303 }
5304 }
5305 }
5306 $imageOptions['title'] = $this->stripAltText( $label, false );
5307
5308 // Match makeImage which sets this unconditionally
5309 $handlerOptions['targetlang'] = $this->getTargetLanguage()->getCode();
5310
5311 $ig->add(
5312 $title, $label, $alt, '', $handlerOptions,
5313 ImageGalleryBase::LOADING_DEFAULT, $imageOptions
5314 );
5315 }
5316 $html = $ig->toHTML();
5317 $this->hookRunner->onAfterParserFetchFileAndTitle( $this, $ig, $html );
5318 return $html;
5319 }
5320
5325 private function getImageParams( $handler ) {
5326 if ( $handler ) {
5327 $handlerClass = get_class( $handler );
5328 } else {
5329 $handlerClass = '';
5330 }
5331 if ( !isset( $this->mImageParams[$handlerClass] ) ) {
5332 # Initialise static lists
5333 static $internalParamNames = [
5334 'horizAlign' => [ 'left', 'right', 'center', 'none' ],
5335 'vertAlign' => [ 'baseline', 'sub', 'super', 'top', 'text-top', 'middle',
5336 'bottom', 'text-bottom' ],
5337 'frame' => [ 'thumbnail', 'manualthumb', 'framed', 'frameless',
5338 'upright', 'border', 'link', 'alt', 'class' ],
5339 ];
5340 static $internalParamMap;
5341 if ( !$internalParamMap ) {
5342 $internalParamMap = [];
5343 foreach ( $internalParamNames as $type => $names ) {
5344 foreach ( $names as $name ) {
5345 // For grep: img_left, img_right, img_center, img_none,
5346 // img_baseline, img_sub, img_super, img_top, img_text_top, img_middle,
5347 // img_bottom, img_text_bottom,
5348 // img_thumbnail, img_manualthumb, img_framed, img_frameless, img_upright,
5349 // img_border, img_link, img_alt, img_class
5350 $magicName = str_replace( '-', '_', "img_$name" );
5351 $internalParamMap[$magicName] = [ $type, $name ];
5352 }
5353 }
5354 }
5355
5356 # Add handler params
5357 $paramMap = $internalParamMap;
5358 if ( $handler ) {
5359 $handlerParamMap = $handler->getParamMap();
5360 foreach ( $handlerParamMap as $magic => $paramName ) {
5361 $paramMap[$magic] = [ 'handler', $paramName ];
5362 }
5363 } else {
5364 // Parse the size for non-existent files. See T273013
5365 $paramMap[ 'img_width' ] = [ 'handler', 'width' ];
5366 }
5367 $this->mImageParams[$handlerClass] = $paramMap;
5368 $this->mImageParamsMagicArray[$handlerClass] =
5369 $this->magicWordFactory->newArray( array_keys( $paramMap ) );
5370 }
5371 return [ $this->mImageParams[$handlerClass], $this->mImageParamsMagicArray[$handlerClass] ];
5372 }
5373
5383 public function makeImage( LinkTarget $link, $options, $holders = false ) {
5384 # Check if the options text is of the form "options|alt text"
5385 # Options are:
5386 # * thumbnail make a thumbnail with enlarge-icon and caption, alignment depends on lang
5387 # * left no resizing, just left align. label is used for alt= only
5388 # * right same, but right aligned
5389 # * none same, but not aligned
5390 # * ___px scale to ___ pixels width, no aligning. e.g. use in taxobox
5391 # * center center the image
5392 # * framed Keep original image size, no magnify-button.
5393 # * frameless like 'thumb' but without a frame. Keeps user preferences for width
5394 # * upright reduce width for upright images, rounded to full __0 px
5395 # * border draw a 1px border around the image
5396 # * alt Text for HTML alt attribute (defaults to empty)
5397 # * class Set a class for img node
5398 # * link Set the target of the image link. Can be external, interwiki, or local
5399 # vertical-align values (no % or length right now):
5400 # * baseline
5401 # * sub
5402 # * super
5403 # * top
5404 # * text-top
5405 # * middle
5406 # * bottom
5407 # * text-bottom
5408
5409 # Protect LanguageConverter markup when splitting into parts
5410 $parts = StringUtils::delimiterExplode(
5411 '-{', '}-', '|', $options, true /* allow nesting */
5412 );
5413
5414 # Give extensions a chance to select the file revision for us
5415 $options = [];
5416 $descQuery = false;
5417 $title = Title::castFromLinkTarget( $link ); // hook signature compat
5418 $this->hookRunner->onBeforeParserFetchFileAndTitle(
5419 // @phan-suppress-next-line PhanTypeMismatchArgument Type mismatch on pass-by-ref args
5420 $this, $title, $options, $descQuery
5421 );
5422 # Fetch and register the file (file title may be different via hooks)
5423 [ $file, $link ] = $this->fetchFileAndTitle( $link, $options );
5424
5425 # Get parameter map
5426 $handler = $file ? $file->getHandler() : false;
5427
5428 [ $paramMap, $mwArray ] = $this->getImageParams( $handler );
5429
5430 if ( !$file ) {
5431 $this->addTrackingCategory( 'broken-file-category' );
5432 }
5433
5434 # Process the input parameters
5435 $caption = '';
5436 $params = [ 'frame' => [], 'handler' => [],
5437 'horizAlign' => [], 'vertAlign' => [] ];
5438 $seenformat = false;
5439 foreach ( $parts as $part ) {
5440 [ $magicName, $value ] = $mwArray->matchVariableStartToEnd( trim( $part ) );
5441 $validated = false;
5442 if ( isset( $paramMap[$magicName] ) ) {
5443 [ $type, $paramName ] = $paramMap[$magicName];
5444
5445 # Special case; width and height come in one variable together
5446 if ( $type === 'handler' && $paramName === 'width' ) {
5447 $parsedWidthParam = self::parseWidthParam( $value );
5448 // Parsoid applies data-(width|height) attributes to broken
5449 // media spans, for client use. See T273013
5450 $validateFunc = static function ( $name, $value ) use ( $handler ) {
5451 return $handler
5452 ? $handler->validateParam( $name, $value )
5453 : $value > 0;
5454 };
5455 if ( isset( $parsedWidthParam['width'] ) ) {
5456 $width = $parsedWidthParam['width'];
5457 if ( $validateFunc( 'width', $width ) ) {
5458 $params[$type]['width'] = $width;
5459 $validated = true;
5460 }
5461 }
5462 if ( isset( $parsedWidthParam['height'] ) ) {
5463 $height = $parsedWidthParam['height'];
5464 if ( $validateFunc( 'height', $height ) ) {
5465 $params[$type]['height'] = $height;
5466 $validated = true;
5467 }
5468 }
5469 # else no validation -- T15436
5470 } else {
5471 if ( $type === 'handler' ) {
5472 # Validate handler parameter
5473 $validated = $handler->validateParam( $paramName, $value );
5474 } else {
5475 # Validate internal parameters
5476 switch ( $paramName ) {
5477 case 'alt':
5478 case 'class':
5479 $validated = true;
5480 $value = $this->stripAltText( $value, $holders );
5481 break;
5482 case 'link':
5483 [ $paramName, $value ] =
5484 $this->parseLinkParameter(
5485 $this->stripAltText( $value, $holders )
5486 );
5487 if ( $paramName ) {
5488 $validated = true;
5489 if ( $paramName === 'no-link' ) {
5490 $value = true;
5491 }
5492 }
5493 break;
5494 case 'manualthumb':
5495 # @todo FIXME: Possibly check validity here for
5496 # manualthumb? downstream behavior seems odd with
5497 # missing manual thumbs.
5498 $value = $this->stripAltText( $value, $holders );
5499 // fall through
5500 case 'frameless':
5501 case 'framed':
5502 case 'thumbnail':
5503 // use first appearing option, discard others.
5504 $validated = !$seenformat;
5505 $seenformat = true;
5506 break;
5507 default:
5508 # Most other things appear to be empty or numeric...
5509 $validated = ( $value === false || is_numeric( trim( $value ) ) );
5510 }
5511 }
5512
5513 if ( $validated ) {
5514 $params[$type][$paramName] = $value;
5515 }
5516 }
5517 }
5518 if ( !$validated ) {
5519 $caption = $part;
5520 }
5521 }
5522
5523 # Process alignment parameters
5524 if ( $params['horizAlign'] !== [] ) {
5525 $params['frame']['align'] = array_key_first( $params['horizAlign'] );
5526 }
5527 if ( $params['vertAlign'] !== [] ) {
5528 $params['frame']['valign'] = array_key_first( $params['vertAlign'] );
5529 }
5530
5531 $params['frame']['caption'] = $caption;
5532
5533 $enableLegacyMediaDOM = $this->svcOptions->get( MainConfigNames::ParserEnableLegacyMediaDOM );
5534
5535 # Will the image be presented in a frame, with the caption below?
5536 // @phan-suppress-next-line PhanImpossibleCondition
5537 $hasVisibleCaption = isset( $params['frame']['framed'] )
5538 // @phan-suppress-next-line PhanImpossibleCondition
5539 || isset( $params['frame']['thumbnail'] )
5540 // @phan-suppress-next-line PhanImpossibleCondition
5541 || isset( $params['frame']['manualthumb'] );
5542
5543 # In the old days, [[Image:Foo|text...]] would set alt text. Later it
5544 # came to also set the caption, ordinary text after the image -- which
5545 # makes no sense, because that just repeats the text multiple times in
5546 # screen readers. It *also* came to set the title attribute.
5547 # Now that we have an alt attribute, we should not set the alt text to
5548 # equal the caption: that's worse than useless, it just repeats the
5549 # text. This is the framed/thumbnail case. If there's no caption, we
5550 # use the unnamed parameter for alt text as well, just for the time be-
5551 # ing, if the unnamed param is set and the alt param is not.
5552 # For the future, we need to figure out if we want to tweak this more,
5553 # e.g., introducing a title= parameter for the title; ignoring the un-
5554 # named parameter entirely for images without a caption; adding an ex-
5555 # plicit caption= parameter and preserving the old magic unnamed para-
5556 # meter for BC; ...
5557 if ( $hasVisibleCaption ) {
5558 if (
5559 // @phan-suppress-next-line PhanImpossibleCondition
5560 $caption === '' && !isset( $params['frame']['alt'] ) &&
5561 $enableLegacyMediaDOM
5562 ) {
5563 # No caption or alt text, add the filename as the alt text so
5564 # that screen readers at least get some description of the image
5565 $params['frame']['alt'] = $link->getText();
5566 }
5567 # Do not set $params['frame']['title'] because tooltips are unnecessary
5568 # for framed images, the caption is visible
5569 } else {
5570 // @phan-suppress-next-line PhanImpossibleCondition
5571 if ( !isset( $params['frame']['alt'] ) ) {
5572 # No alt text, use the "caption" for the alt text
5573 if ( $caption !== '' ) {
5574 $params['frame']['alt'] = $this->stripAltText( $caption, $holders );
5575 } elseif ( $enableLegacyMediaDOM ) {
5576 # No caption, fall back to using the filename for the
5577 # alt text
5578 $params['frame']['alt'] = $link->getText();
5579 }
5580 }
5581 # Use the "caption" for the tooltip text
5582 $params['frame']['title'] = $this->stripAltText( $caption, $holders );
5583 }
5584 $params['handler']['targetlang'] = $this->getTargetLanguage()->getCode();
5585
5586 // hook signature compat again, $link may have changed
5587 $title = Title::castFromLinkTarget( $link );
5588 $this->hookRunner->onParserMakeImageParams( $title, $file, $params, $this );
5589
5590 # Linker does the rest
5591 $time = $options['time'] ?? false;
5592 // @phan-suppress-next-line PhanTypePossiblyInvalidDimOffset
5593 $ret = Linker::makeImageLink( $this, $link, $file, $params['frame'], $params['handler'],
5594 $time, $descQuery, $this->mOptions->getThumbSize() );
5595
5596 # Give the handler a chance to modify the parser object
5597 if ( $handler ) {
5598 $handler->parserTransformHook( $this, $file );
5599 }
5600 if ( $file ) {
5601 $this->modifyImageHtml( $file, $params, $ret );
5602 }
5603
5604 return $ret;
5605 }
5606
5625 private function parseLinkParameter( $value ) {
5626 $chars = self::EXT_LINK_URL_CLASS;
5627 $addr = self::EXT_LINK_ADDR;
5628 $prots = $this->urlUtils->validProtocols();
5629 $type = null;
5630 $target = false;
5631 if ( $value === '' ) {
5632 $type = 'no-link';
5633 } elseif ( preg_match( "/^((?i)$prots)/", $value ) ) {
5634 if ( preg_match( "/^((?i)$prots)$addr$chars*$/u", $value ) ) {
5635 $this->mOutput->addExternalLink( $value );
5636 $type = 'link-url';
5637 $target = $value;
5638 }
5639 } else {
5640 // Percent-decode link arguments for consistency with wikilink
5641 // handling (T216003#7836261).
5642 //
5643 // There's slight concern here though. The |link= option supports
5644 // two formats, link=Test%22test vs link=[[Test%22test]], both of
5645 // which are about to be decoded.
5646 //
5647 // In the former case, the decoding here is straightforward and
5648 // desirable.
5649 //
5650 // In the latter case, there's a potential for double decoding,
5651 // because the wikilink syntax has a higher precedence and has
5652 // already been parsed as a link before we get here. $value
5653 // has had stripAltText() called on it, which in turn calls
5654 // replaceLinkHoldersText() on the link. So, the text we're
5655 // getting at this point has already been percent decoded.
5656 //
5657 // The problematic case is if %25 is in the title, since that
5658 // decodes to %, which could combine with trailing characters.
5659 // However, % is not a valid link title character, so it would
5660 // not parse as a link and the string we received here would
5661 // still contain the encoded %25.
5662 //
5663 // Hence, double decoded is not an issue. See the test,
5664 // "Should not double decode the link option"
5665 if ( strpos( $value, '%' ) !== false ) {
5666 $value = rawurldecode( $value );
5667 }
5668 $linkTitle = Title::newFromText( $value );
5669 if ( $linkTitle ) {
5670 $this->mOutput->addLink( $linkTitle );
5671 $type = 'link-title';
5672 $target = $linkTitle;
5673 }
5674 }
5675 return [ $type, $target ];
5676 }
5677
5685 public function modifyImageHtml( File $file, array $params, string &$html ) {
5686 $this->hookRunner->onParserModifyImageHTML( $this, $file, $params, $html );
5687 }
5688
5694 private function stripAltText( $caption, $holders ) {
5695 # Strip bad stuff out of the title (tooltip). We can't just use
5696 # replaceLinkHoldersText() here, because if this function is called
5697 # from handleInternalLinks2(), mLinkHolders won't be up-to-date.
5698 if ( $holders ) {
5699 $tooltip = $holders->replaceText( $caption );
5700 } else {
5701 $tooltip = $this->replaceLinkHoldersText( $caption );
5702 }
5703
5704 # make sure there are no placeholders in thumbnail attributes
5705 # that are later expanded to html- so expand them now and
5706 # remove the tags
5707 $tooltip = $this->mStripState->unstripBoth( $tooltip );
5708 # Compatibility hack! In HTML certain entity references not terminated
5709 # by a semicolon are decoded (but not if we're in an attribute; that's
5710 # how link URLs get away without properly escaping & in queries).
5711 # But wikitext has always required semicolon-termination of entities,
5712 # so encode & where needed to avoid decode of semicolon-less entities.
5713 # See T209236 and
5714 # https://www.w3.org/TR/html5/syntax.html#named-character-references
5715 # T210437 discusses moving this workaround to Sanitizer::stripAllTags.
5716 $tooltip = preg_replace( "/
5717 & # 1. entity prefix
5718 (?= # 2. followed by:
5719 (?: # a. one of the legacy semicolon-less named entities
5720 A(?:Elig|MP|acute|circ|grave|ring|tilde|uml)|
5721 C(?:OPY|cedil)|E(?:TH|acute|circ|grave|uml)|
5722 GT|I(?:acute|circ|grave|uml)|LT|Ntilde|
5723 O(?:acute|circ|grave|slash|tilde|uml)|QUOT|REG|THORN|
5724 U(?:acute|circ|grave|uml)|Yacute|
5725 a(?:acute|c(?:irc|ute)|elig|grave|mp|ring|tilde|uml)|brvbar|
5726 c(?:cedil|edil|urren)|cent(?!erdot;)|copy(?!sr;)|deg|
5727 divide(?!ontimes;)|e(?:acute|circ|grave|th|uml)|
5728 frac(?:1(?:2|4)|34)|
5729 gt(?!c(?:c|ir)|dot|lPar|quest|r(?:a(?:pprox|rr)|dot|eq(?:less|qless)|less|sim);)|
5730 i(?:acute|circ|excl|grave|quest|uml)|laquo|
5731 lt(?!c(?:c|ir)|dot|hree|imes|larr|quest|r(?:Par|i(?:e|f|));)|
5732 m(?:acr|i(?:cro|ddot))|n(?:bsp|tilde)|
5733 not(?!in(?:E|dot|v(?:a|b|c)|)|ni(?:v(?:a|b|c)|);)|
5734 o(?:acute|circ|grave|rd(?:f|m)|slash|tilde|uml)|
5735 p(?:lusmn|ound)|para(?!llel;)|quot|r(?:aquo|eg)|
5736 s(?:ect|hy|up(?:1|2|3)|zlig)|thorn|times(?!b(?:ar|)|d;)|
5737 u(?:acute|circ|grave|ml|uml)|y(?:acute|en|uml)
5738 )
5739 (?:[^;]|$)) # b. and not followed by a semicolon
5740 # S = study, for efficiency
5741 /Sx", '&amp;', $tooltip );
5742 $tooltip = Sanitizer::stripAllTags( $tooltip );
5743
5744 return $tooltip;
5745 }
5746
5756 public function attributeStripCallback( &$text, $frame = false ) {
5757 wfDeprecated( __METHOD__, '1.35' );
5758 $text = $this->replaceVariables( $text, $frame );
5759 $text = $this->mStripState->unstripBoth( $text );
5760 return $text;
5761 }
5762
5769 public function getTags(): array {
5770 return array_keys( $this->mTagHooks );
5771 }
5772
5777 public function getFunctionSynonyms() {
5778 return $this->mFunctionSynonyms;
5779 }
5780
5785 public function getUrlProtocols() {
5786 return $this->urlUtils->validProtocols();
5787 }
5788
5819 private function extractSections( $text, $sectionId, $mode, $newText, ?PageReference $page = null ) {
5820 $magicScopeVariable = $this->lock();
5821 $this->startParse(
5822 $page,
5823 ParserOptions::newFromUser( RequestContext::getMain()->getUser() ),
5824 self::OT_PLAIN,
5825 true
5826 );
5827 $outText = '';
5828 $frame = $this->getPreprocessor()->newFrame();
5829
5830 # Process section extraction flags
5831 $flags = 0;
5832 $sectionParts = explode( '-', $sectionId );
5833 // The section ID may either be a magic string such as 'new' (which should be treated as 0),
5834 // or a numbered section ID in the format of "T-<section index>".
5835 // Explicitly coerce the section index into a number accordingly. (T323373)
5836 $sectionIndex = (int)array_pop( $sectionParts );
5837 foreach ( $sectionParts as $part ) {
5838 if ( $part === 'T' ) {
5839 $flags |= Preprocessor::DOM_FOR_INCLUSION;
5840 }
5841 }
5842
5843 # Check for empty input
5844 if ( strval( $text ) === '' ) {
5845 # Only sections 0 and T-0 exist in an empty document
5846 if ( $sectionIndex === 0 ) {
5847 if ( $mode === 'get' ) {
5848 return '';
5849 }
5850
5851 return $newText;
5852 } else {
5853 if ( $mode === 'get' ) {
5854 return $newText;
5855 }
5856
5857 return $text;
5858 }
5859 }
5860
5861 # Preprocess the text
5862 $root = $this->preprocessToDom( $text, $flags );
5863
5864 # <h> nodes indicate section breaks
5865 # They can only occur at the top level, so we can find them by iterating the root's children
5866 $node = $root->getFirstChild();
5867
5868 # Find the target section
5869 if ( $sectionIndex === 0 ) {
5870 # Section zero doesn't nest, level=big
5871 $targetLevel = 1000;
5872 } else {
5873 while ( $node ) {
5874 if ( $node->getName() === 'h' ) {
5875 $bits = $node->splitHeading();
5876 if ( $bits['i'] == $sectionIndex ) {
5877 $targetLevel = $bits['level'];
5878 break;
5879 }
5880 }
5881 if ( $mode === 'replace' ) {
5882 $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG );
5883 }
5884 $node = $node->getNextSibling();
5885 }
5886 }
5887
5888 if ( !$node ) {
5889 # Not found
5890 if ( $mode === 'get' ) {
5891 return $newText;
5892 } else {
5893 return $text;
5894 }
5895 }
5896
5897 # Find the end of the section, including nested sections
5898 do {
5899 if ( $node->getName() === 'h' ) {
5900 $bits = $node->splitHeading();
5901 $curLevel = $bits['level'];
5902 // @phan-suppress-next-line PhanPossiblyUndeclaredVariable False positive
5903 if ( $bits['i'] != $sectionIndex && $curLevel <= $targetLevel ) {
5904 break;
5905 }
5906 }
5907 if ( $mode === 'get' ) {
5908 $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG );
5909 }
5910 $node = $node->getNextSibling();
5911 } while ( $node );
5912
5913 # Write out the remainder (in replace mode only)
5914 if ( $mode === 'replace' ) {
5915 # Output the replacement text
5916 # Add two newlines on -- trailing whitespace in $newText is conventionally
5917 # stripped by the editor, so we need both newlines to restore the paragraph gap
5918 # Only add trailing whitespace if there is newText
5919 if ( $newText != "" ) {
5920 $outText .= $newText . "\n\n";
5921 }
5922
5923 while ( $node ) {
5924 $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG );
5925 $node = $node->getNextSibling();
5926 }
5927 }
5928
5929 # Re-insert stripped tags
5930 $outText = rtrim( $this->mStripState->unstripBoth( $outText ) );
5931
5932 return $outText;
5933 }
5934
5950 public function getSection( $text, $sectionId, $defaultText = '' ) {
5951 return $this->extractSections( $text, $sectionId, 'get', $defaultText );
5952 }
5953
5967 public function replaceSection( $oldText, $sectionId, $newText ) {
5968 return $this->extractSections( $oldText, $sectionId, 'replace', $newText );
5969 }
5970
6000 public function getFlatSectionInfo( $text ) {
6001 $magicScopeVariable = $this->lock();
6002 $this->startParse(
6003 null,
6004 ParserOptions::newFromUser( RequestContext::getMain()->getUser() ),
6005 self::OT_PLAIN,
6006 true
6007 );
6008 $frame = $this->getPreprocessor()->newFrame();
6009 $root = $this->preprocessToDom( $text, 0 );
6010 $node = $root->getFirstChild();
6011 $offset = 0;
6012 $currentSection = [
6013 'index' => 0,
6014 'level' => 0,
6015 'offset' => 0,
6016 'heading' => '',
6017 'text' => ''
6018 ];
6019 $sections = [];
6020
6021 while ( $node ) {
6022 $nodeText = $frame->expand( $node, PPFrame::RECOVER_ORIG );
6023 if ( $node->getName() === 'h' ) {
6024 $bits = $node->splitHeading();
6025 $sections[] = $currentSection;
6026 $currentSection = [
6027 'index' => $bits['i'],
6028 'level' => $bits['level'],
6029 'offset' => $offset,
6030 'heading' => $nodeText,
6031 'text' => $nodeText
6032 ];
6033 } else {
6034 $currentSection['text'] .= $nodeText;
6035 }
6036 $offset += strlen( $nodeText );
6037 $node = $node->getNextSibling();
6038 }
6039 $sections[] = $currentSection;
6040 return $sections;
6041 }
6042
6054 public function getRevisionId() {
6055 return $this->mRevisionId;
6056 }
6057
6064 public function getRevisionRecordObject() {
6065 if ( $this->mRevisionRecordObject ) {
6066 return $this->mRevisionRecordObject;
6067 }
6068
6069 // NOTE: try to get the RevisionRecord object even if mRevisionId is null.
6070 // This is useful when parsing a revision that has not yet been saved.
6071 // However, if we get back a saved revision even though we are in
6072 // preview mode, we'll have to ignore it, see below.
6073 // NOTE: This callback may be used to inject an OLD revision that was
6074 // already loaded, so "current" is a bit of a misnomer. We can't just
6075 // skip it if mRevisionId is set.
6076 $rev = call_user_func(
6077 $this->mOptions->getCurrentRevisionRecordCallback(),
6078 $this->getTitle(),
6079 $this
6080 );
6081
6082 if ( !$rev ) {
6083 // The revision record callback returns `false` (not null) to
6084 // indicate that the revision is missing. (See for example
6085 // Parser::statelessFetchRevisionRecord(), the default callback.)
6086 // This API expects `null` instead. (T251952)
6087 return null;
6088 }
6089
6090 if ( $this->mRevisionId === null && $rev->getId() ) {
6091 // We are in preview mode (mRevisionId is null), and the current revision callback
6092 // returned an existing revision. Ignore it and return null, it's probably the page's
6093 // current revision, which is not what we want here. Note that we do want to call the
6094 // callback to allow the unsaved revision to be injected here, e.g. for
6095 // self-transclusion previews.
6096 return null;
6097 }
6098
6099 // If the parse is for a new revision, then the callback should have
6100 // already been set to force the object and should match mRevisionId.
6101 // If not, try to fetch by mRevisionId instead.
6102 if ( $this->mRevisionId && $rev->getId() != $this->mRevisionId ) {
6103 $rev = MediaWikiServices::getInstance()
6104 ->getRevisionLookup()
6105 ->getRevisionById( $this->mRevisionId );
6106 }
6107
6108 $this->mRevisionRecordObject = $rev;
6109
6110 return $this->mRevisionRecordObject;
6111 }
6112
6119 public function getRevisionTimestamp() {
6120 if ( $this->mRevisionTimestamp !== null ) {
6121 return $this->mRevisionTimestamp;
6122 }
6123
6124 # Use specified revision timestamp, falling back to the current timestamp
6125 $revObject = $this->getRevisionRecordObject();
6126 $timestamp = $revObject && $revObject->getTimestamp()
6127 ? $revObject->getTimestamp()
6128 : $this->mOptions->getTimestamp();
6129 $this->mOutput->setRevisionTimestampUsed( $timestamp ); // unadjusted time zone
6130
6131 # The cryptic '' timezone parameter tells to use the site-default
6132 # timezone offset instead of the user settings.
6133 # Since this value will be saved into the parser cache, served
6134 # to other users, and potentially even used inside links and such,
6135 # it needs to be consistent for all visitors.
6136 $this->mRevisionTimestamp = $this->contLang->userAdjust( $timestamp, '' );
6137
6138 return $this->mRevisionTimestamp;
6139 }
6140
6147 public function getRevisionUser(): ?string {
6148 if ( $this->mRevisionUser === null ) {
6149 $revObject = $this->getRevisionRecordObject();
6150
6151 # if this template is subst: the revision id will be blank,
6152 # so just use the current user's name
6153 if ( $revObject && $revObject->getUser() ) {
6154 $this->mRevisionUser = $revObject->getUser()->getName();
6155 } elseif ( $this->ot['wiki'] || $this->mOptions->getIsPreview() ) {
6156 $this->mRevisionUser = $this->getUserIdentity()->getName();
6157 } else {
6158 # Note that we fall through here with
6159 # $this->mRevisionUser still null
6160 }
6161 }
6162 return $this->mRevisionUser;
6163 }
6164
6171 public function getRevisionSize() {
6172 if ( $this->mRevisionSize === null ) {
6173 $revObject = $this->getRevisionRecordObject();
6174
6175 # if this variable is subst: the revision id will be blank,
6176 # so just use the parser input size, because the own substitution
6177 # will change the size.
6178 if ( $revObject ) {
6179 $this->mRevisionSize = $revObject->getSize();
6180 } else {
6181 $this->mRevisionSize = $this->mInputSize;
6182 }
6183 }
6184 return $this->mRevisionSize;
6185 }
6186
6200 public function getDefaultSort() {
6201 wfDeprecated( __METHOD__, '1.38' );
6202 return $this->mOutput->getPageProperty( 'defaultsort' ) ?? '';
6203 }
6204
6205 private static function getSectionNameFromStrippedText( $text ) {
6206 $text = Sanitizer::normalizeSectionNameWhitespace( $text );
6207 $text = Sanitizer::decodeCharReferences( $text );
6208 $text = self::normalizeSectionName( $text );
6209 return $text;
6210 }
6211
6212 private static function makeAnchor( $sectionName ) {
6213 return '#' . Sanitizer::escapeIdForLink( $sectionName );
6214 }
6215
6216 private function makeLegacyAnchor( $sectionName ) {
6217 $fragmentMode = $this->svcOptions->get( MainConfigNames::FragmentMode );
6218 if ( isset( $fragmentMode[1] ) && $fragmentMode[1] === 'legacy' ) {
6219 // ForAttribute() and ForLink() are the same for legacy encoding
6220 $id = Sanitizer::escapeIdForAttribute( $sectionName, Sanitizer::ID_FALLBACK );
6221 } else {
6222 $id = Sanitizer::escapeIdForLink( $sectionName );
6223 }
6224
6225 return "#$id";
6226 }
6227
6237 public function guessSectionNameFromWikiText( $text ) {
6238 # Strip out wikitext links(they break the anchor)
6239 $text = $this->stripSectionName( $text );
6240 $sectionName = self::getSectionNameFromStrippedText( $text );
6241 return self::makeAnchor( $sectionName );
6242 }
6243
6254 public function guessLegacySectionNameFromWikiText( $text ) {
6255 # Strip out wikitext links(they break the anchor)
6256 $text = $this->stripSectionName( $text );
6257 $sectionName = self::getSectionNameFromStrippedText( $text );
6258 return $this->makeLegacyAnchor( $sectionName );
6259 }
6260
6267 public static function guessSectionNameFromStrippedText( $text ) {
6268 $sectionName = self::getSectionNameFromStrippedText( $text );
6269 return self::makeAnchor( $sectionName );
6270 }
6271
6278 private static function normalizeSectionName( $text ) {
6279 # T90902: ensure the same normalization is applied for IDs as to links
6281 $titleParser = MediaWikiServices::getInstance()->getTitleParser();
6282 '@phan-var MediaWikiTitleCodec $titleParser';
6283 try {
6284
6285 $parts = $titleParser->splitTitleString( "#$text" );
6286 } catch ( MalformedTitleException $ex ) {
6287 return $text;
6288 }
6289 return $parts['fragment'];
6290 }
6291
6307 public function stripSectionName( $text ) {
6308 # Strip internal link markup
6309 $text = preg_replace( '/\[\[:?([^[|]+)\|([^[]+)\]\]/', '$2', $text );
6310 $text = preg_replace( '/\[\[:?([^[]+)\|?\]\]/', '$1', $text );
6311
6312 # Strip external link markup
6313 # @todo FIXME: Not tolerant to blank link text
6314 # I.E. [https://www.mediawiki.org] will render as [1] or something depending
6315 # on how many empty links there are on the page - need to figure that out.
6316 $text = preg_replace(
6317 '/\[(?i:' . $this->urlUtils->validProtocols() . ')([^ ]+?) ([^[]+)\]/', '$2', $text );
6318
6319 # Parse wikitext quotes (italics & bold)
6320 $text = $this->doQuotes( $text );
6321
6322 # Strip HTML tags
6323 $text = StringUtils::delimiterReplace( '<', '>', '', $text );
6324 return $text;
6325 }
6326
6345 public function markerSkipCallback( $s, callable $callback ) {
6346 $i = 0;
6347 $out = '';
6348 while ( $i < strlen( $s ) ) {
6349 $markerStart = strpos( $s, self::MARKER_PREFIX, $i );
6350 if ( $markerStart === false ) {
6351 $out .= call_user_func( $callback, substr( $s, $i ) );
6352 break;
6353 } else {
6354 $out .= call_user_func( $callback, substr( $s, $i, $markerStart - $i ) );
6355 $markerEnd = strpos( $s, self::MARKER_SUFFIX, $markerStart );
6356 if ( $markerEnd === false ) {
6357 $out .= substr( $s, $markerStart );
6358 break;
6359 } else {
6360 $markerEnd += strlen( self::MARKER_SUFFIX );
6361 $out .= substr( $s, $markerStart, $markerEnd - $markerStart );
6362 $i = $markerEnd;
6363 }
6364 }
6365 }
6366 return $out;
6367 }
6368
6376 public function killMarkers( $text ) {
6377 return $this->mStripState->killMarkers( $text );
6378 }
6379
6390 public static function parseWidthParam( $value, $parseHeight = true ) {
6391 $parsedWidthParam = [];
6392 if ( $value === '' ) {
6393 return $parsedWidthParam;
6394 }
6395 $m = [];
6396 # (T15500) In both cases (width/height and width only),
6397 # permit trailing "px" for backward compatibility.
6398 if ( $parseHeight && preg_match( '/^([0-9]*)x([0-9]*)\s*(?:px)?\s*$/', $value, $m ) ) {
6399 $width = intval( $m[1] );
6400 $height = intval( $m[2] );
6401 $parsedWidthParam['width'] = $width;
6402 $parsedWidthParam['height'] = $height;
6403 } elseif ( preg_match( '/^[0-9]*\s*(?:px)?\s*$/', $value ) ) {
6404 $width = intval( $value );
6405 $parsedWidthParam['width'] = $width;
6406 }
6407 return $parsedWidthParam;
6408 }
6409
6418 protected function lock() {
6419 if ( $this->mInParse ) {
6420 throw new LogicException( "Parser state cleared while parsing. "
6421 . "Did you call Parser::parse recursively? Lock is held by: " . $this->mInParse );
6422 }
6423
6424 // Save the backtrace when locking, so that if some code tries locking again,
6425 // we can print the lock owner's backtrace for easier debugging
6426 $e = new RuntimeException;
6427 $this->mInParse = $e->getTraceAsString();
6428
6429 $recursiveCheck = new ScopedCallback( function () {
6430 $this->mInParse = false;
6431 } );
6432
6433 return $recursiveCheck;
6434 }
6435
6443 public function isLocked() {
6444 return (bool)$this->mInParse;
6445 }
6446
6457 public static function stripOuterParagraph( $html ) {
6458 $m = [];
6459 if ( preg_match( '/^<p>(.*)\n?<\/p>\n?$/sU', $html, $m ) && strpos( $m[1], '</p>' ) === false ) {
6460 $html = $m[1];
6461 }
6462
6463 return $html;
6464 }
6465
6476 public static function formatPageTitle( $nsText, $nsSeparator, $mainText ): string {
6477 $html = '';
6478 if ( $nsText !== '' ) {
6479 $html .= '<span class="mw-page-title-namespace">' . HtmlArmor::getHtml( $nsText ) . '</span>';
6480 $html .= '<span class="mw-page-title-separator">' . HtmlArmor::getHtml( $nsSeparator ) . '</span>';
6481 }
6482 $html .= '<span class="mw-page-title-main">' . HtmlArmor::getHtml( $mainText ) . '</span>';
6483 return $html;
6484 }
6485
6492 public static function extractBody( string $text ): string {
6493 $text = preg_replace( '!^.*?<body[^>]*>!s', '', $text, 1 );
6494 $text = preg_replace( '!</body>\s*</html>\s*$!', '', $text, 1 );
6495 return $text;
6496 }
6497
6505 public function enableOOUI() {
6506 wfDeprecated( __METHOD__, '1.35' );
6507 OutputPage::setupOOUI();
6508 $this->mOutput->setEnableOOUI( true );
6509 }
6510
6517 private function setOutputFlag( string $flag, string $reason ): void {
6518 $this->mOutput->setOutputFlag( $flag );
6519 $name = $this->getTitle()->getPrefixedText();
6520 $this->logger->debug( __METHOD__ . ": set $flag flag on '$name'; $reason" );
6521 }
6522}
6523
6525class_alias( Parser::class, 'Parser' );
getUser()
const OT_WIKI
Definition Defines.php:159
const NS_FILE
Definition Defines.php:71
const NS_MEDIAWIKI
Definition Defines.php:73
const NS_TEMPLATE
Definition Defines.php:75
const NS_SPECIAL
Definition Defines.php:54
const OT_PLAIN
Definition Defines.php:161
const OT_PREPROCESS
Definition Defines.php:160
const OT_HTML
Definition Defines.php:158
const NS_MEDIA
Definition Defines.php:53
const NS_CATEGORY
Definition Defines.php:79
wfEscapeWikiText( $input)
Escapes the given text so that it may be output using addWikiText() without any linking,...
wfDeprecatedMsg( $msg, $version=false, $component=false, $callerOffset=2)
Log a deprecation warning with arbitrary message text.
wfHostname()
Get host name of the current machine, for use in error reporting.
wfSetVar(&$dest, $source, $force=false)
Sets dest to source and returns the original value of dest If source is NULL, it just returns the val...
wfMatchesDomainList( $url, $domains)
Check whether a given URL has a domain that occurs in a given set of domains.
wfMessage( $key,... $params)
This is the function for getting translated interface messages.
wfDeprecated( $function, $version=false, $component=false, $callerOffset=2)
Logs a warning that a deprecated feature was used.
Title null $mTitle
array $params
The job parameters.
if(!defined('MW_SETUP_CALLBACK'))
Definition WebStart.php:81
Expansions of core magic variables, used by the parser.
Various core parser functions, registered in every Parser.
Various tag hooks, registered in every Parser.
static register(Parser $parser, ServiceOptions $options)
const REGISTER_OPTIONS
Implements some public methods and some protected utility functions which are required by multiple ch...
Definition File.php:74
Marks HTML that shouldn't be escaped.
Definition HtmlArmor.php:30
Class for exceptions thrown by ImageGalleryBase::factory().
Methods for dealing with language codes.
Base class for language-specific code.
Definition Language.php:66
formatNum( $number)
Normally we output all numbers in plain en_US style, that is 293,291.235 for two hundred ninety-three...
Store key-value entries in a size-limited in-memory LRU cache.
Base media handler class.
Helper class for mapping value objects representing basic entities to cache keys.
This class performs some operations related to tracking categories, such as adding a tracking categor...
A class for passing options to services.
assertRequiredOptions(array $expectedKeys)
Assert that the list of options provided in this instance exactly match $expectedKeys,...
Content object implementation for representing flat text.
Group all the pieces relevant to the context of a request into one instance.
This class provides an implementation of the core hook interfaces, forwarding hook calls to HookConta...
This class is a collection of static functions that serve two purposes:
Definition Html.php:56
Factory creating MWHttpRequest objects.
Variant of the Message class.
An interface for creating language converters.
isConversionDisabled()
Whether to disable language variant conversion.
A service that provides utilities to do with language names and codes.
Factory to create LinkRender objects.
Class that generates HTML for internal links.
Some internal bits split of from Skin.php.
Definition Linker.php:63
A class containing constants representing the names of configuration variables.
const EnableParserLimitReporting
Name constant for the EnableParserLimitReporting setting, for use with Config::get()
const MaxSigChars
Name constant for the MaxSigChars setting, for use with Config::get()
const ServerName
Name constant for the ServerName setting, for use with Config::get()
const AllowSlowParserFunctions
Name constant for the AllowSlowParserFunctions setting, for use with Config::get()
const AllowDisplayTitle
Name constant for the AllowDisplayTitle setting, for use with Config::get()
const StylePath
Name constant for the StylePath setting, for use with Config::get()
const MaxTocLevel
Name constant for the MaxTocLevel setting, for use with Config::get()
const Localtimezone
Name constant for the Localtimezone setting, for use with Config::get()
const Server
Name constant for the Server setting, for use with Config::get()
const FragmentMode
Name constant for the FragmentMode setting, for use with Config::get()
const EnableScaryTranscluding
Name constant for the EnableScaryTranscluding setting, for use with Config::get()
const TranscludeCacheExpiry
Name constant for the TranscludeCacheExpiry setting, for use with Config::get()
const Sitename
Name constant for the Sitename setting, for use with Config::get()
const ArticlePath
Name constant for the ArticlePath setting, for use with Config::get()
const ScriptPath
Name constant for the ScriptPath setting, for use with Config::get()
const ParserEnableLegacyMediaDOM
Name constant for the ParserEnableLegacyMediaDOM setting, for use with Config::get()
const SignatureValidation
Name constant for the SignatureValidation setting, for use with Config::get()
const MiserMode
Name constant for the MiserMode setting, for use with Config::get()
const RawHtml
Name constant for the RawHtml setting, for use with Config::get()
const PreprocessorCacheThreshold
Name constant for the PreprocessorCacheThreshold setting, for use with Config::get()
const ExtraInterlanguageLinkPrefixes
Name constant for the ExtraInterlanguageLinkPrefixes setting, for use with Config::get()
const ShowHostnames
Name constant for the ShowHostnames setting, for use with Config::get()
Service locator for MediaWiki core services.
The Message class deals with fetching and processing of interface message into a variety of formats.
Definition Message.php:158
This is one of the Core classes and should be read at least once by any new developers.
Class for handling an array of magic words.
Store information about magic words, and create/cache MagicWord objects.
ParserOutput is a rendering of a Content object or a message.
PHP Parser - Processes wiki markup (which uses a more user-friendly syntax, such as "[[link]]" for ma...
Definition Parser.php:155
guessLegacySectionNameFromWikiText( $text)
Same as guessSectionNameFromWikiText(), but produces legacy anchors instead, if possible.
Definition Parser.php:6254
$mExpensiveFunctionCount
Number of expensive parser function calls.
Definition Parser.php:299
getTargetLanguageConverter()
Shorthand for getting a Language Converter for Target language.
Definition Parser.php:1674
setOutputType( $ot)
Mutator for the output type.
Definition Parser.php:1100
getBadFileLookup()
Get the BadFileLookup instance that this Parser is using.
Definition Parser.php:1275
stripSectionName( $text)
Strips a text string of wikitext for use in a section anchor.
Definition Parser.php:6307
getDefaultSort()
Accessor for the 'defaultsort' page property.
Definition Parser.php:6200
limitationWarn( $limitationType, $current='', $max='')
Warn the user when a parser limitation is reached Will warn at most once the user per limitation type...
Definition Parser.php:3012
makeImage(LinkTarget $link, $options, $holders=false)
Parse image options text and use it to make an image.
Definition Parser.php:5383
const OT_PLAIN
Output type: like Parser::extractSections() - portions of the original are returned unchanged.
Definition Parser.php:202
static guessSectionNameFromStrippedText( $text)
Like guessSectionNameFromWikiText(), but takes already-stripped text as input.
Definition Parser.php:6267
static statelessFetchTemplate( $page, $parser=false)
Static function to get a template Can be overridden via ParserOptions::setTemplateCallback().
Definition Parser.php:3665
markerSkipCallback( $s, callable $callback)
Call a callback function on all regions of the given text that are not inside strip markers,...
Definition Parser.php:6345
getPreloadText( $text, PageReference $page, ParserOptions $options, $params=[])
Process the wikitext for the "?preload=" feature.
Definition Parser.php:998
getLinkRenderer()
Get a LinkRenderer instance to make links with.
Definition Parser.php:1240
parse( $text, PageReference $page, ParserOptions $options, $linestart=true, $clearState=true, $revid=null)
Convert wikitext to HTML Do not call this function recursively.
Definition Parser.php:696
tagNeedsNowikiStrippedInTagPF(string $lowerTagName)
Definition Parser.php:3979
doBlockLevels( $text, $linestart)
Make lists from lines starting with ':', '*', '#', etc.
Definition Parser.php:2855
getMagicWordFactory()
Get the MagicWordFactory that this Parser is using.
Definition Parser.php:1255
lock()
Lock the current instance of the parser.
Definition Parser.php:6418
setFunctionHook( $id, callable $callback, $flags=0)
Create a function, e.g.
Definition Parser.php:5055
const EXT_LINK_URL_CLASS
Everything except bracket, space, or control characters.
Definition Parser.php:170
msg(string $msg,... $args)
Helper function to correctly set the target language and title of a message based on the parser conte...
Definition Parser.php:4197
preprocess( $text, ?PageReference $page, ParserOptions $options, $revid=null, $frame=false)
Expand templates and variables in the text, producing valid, static wikitext.
Definition Parser.php:951
firstCallInit()
Used to do various kinds of initialisation on the first call of the parser.
Definition Parser.php:615
guessSectionNameFromWikiText( $text)
Try to guess the section anchor name based on a wikitext fragment presumably extracted from a heading...
Definition Parser.php:6237
getUserSig(UserIdentity $user, $nickname=false, $fancySig=null)
Fetch the user's signature text, if any, and normalize to validated, ready-to-insert wikitext.
Definition Parser.php:4744
setTitle(Title $t=null)
Set the context title.
Definition Parser.php:1031
interwikiTransclude(LinkTarget $link, $action)
Transclude an interwiki link.
Definition Parser.php:3867
makeLimitReport()
Set the limit report data in the current ParserOutput.
Definition Parser.php:795
validateSig( $text)
Check that the user's signature contains no bad XML.
Definition Parser.php:4813
isCurrentRevisionOfTitleCached(LinkTarget $link)
Definition Parser.php:3584
getRevisionId()
Get the ID of the revision we are parsing.
Definition Parser.php:6054
renderImageGallery( $text, array $params)
Renders an image gallery from a text with one line per image.
Definition Parser.php:5139
argSubstitution(array $piece, PPFrame $frame)
Triple brace replacement – used for template arguments.
Definition Parser.php:3938
replaceSection( $oldText, $sectionId, $newText)
This function returns $oldtext after the content of the section specified by $section has been replac...
Definition Parser.php:5967
transformMsg( $text, ParserOptions $options, ?PageReference $page=null)
Wrapper for preprocess()
Definition Parser.php:4955
insertStripItem( $text)
Add an item to the strip state Returns the unique tag which must be inserted into the stripped text T...
Definition Parser.php:1382
internalParse( $text, $isMain=true, $frame=false)
Helper function for parse() that transforms wiki markup into half-parsed HTML.
Definition Parser.php:1605
static normalizeLinkUrl( $url)
Replace unusual escape codes in a URL with their equivalent characters.
Definition Parser.php:2339
replaceLinkHolders(&$text)
Replace "<!--LINK-->" link placeholders with actual links, in the buffer Placeholders created in Link...
Definition Parser.php:5100
static extractTagsAndParams(array $elements, $text, &$matches)
Replaces all occurrences of HTML-style comments and the given tags in the text with a random marker a...
Definition Parser.php:1298
static statelessFetchRevisionRecord(LinkTarget $link, $parser=null)
Wrapper around RevisionLookup::getKnownCurrentRevision.
Definition Parser.php:3600
getHookRunner()
Get a HookRunner for calling core hooks.
Definition Parser.php:1710
getContentLanguage()
Get the content language that this Parser is using.
Definition Parser.php:1265
getExternalLinkAttribs( $url)
Get an associative array of additional HTML attributes appropriate for a particular external link.
Definition Parser.php:2306
setPage(?PageReference $t=null)
Set the page used as context for parsing, e.g.
Definition Parser.php:1050
setOptions(ParserOptions $options)
Mutator for the ParserOptions object.
Definition Parser.php:1150
preSaveTransform( $text, PageReference $page, UserIdentity $user, ParserOptions $options, $clearState=true)
Transform wiki markup when saving a page by doing "\\r\\n" -> "\\n" conversion, substituting signatur...
Definition Parser.php:4623
killMarkers( $text)
Remove any strip markers found in the given text.
Definition Parser.php:6376
const OT_PREPROCESS
Output type: like Parser::preprocess()
Definition Parser.php:197
cleanSig( $text, $parsing=false)
Clean up signature text.
Definition Parser.php:4828
isLocked()
Will entry points such as parse() throw an exception due to the parser already being active?
Definition Parser.php:6443
getTemplateDom(LinkTarget $title)
Get the semi-parsed DOM representation of a template with a given title, and its redirect destination...
Definition Parser.php:3509
getRevisionUser()
Get the name of the user that edited the last revision.
Definition Parser.php:6147
getFlatSectionInfo( $text)
Get an array of preprocessor section information.
Definition Parser.php:6000
getTargetLanguage()
Get the target language for the content being parsed.
Definition Parser.php:1201
clearState()
Clear Parser state.
Definition Parser.php:627
getFunctionHooks()
Get all registered function hook identifiers.
Definition Parser.php:5089
enableOOUI()
Set's up the PHP implementation of OOUI for use in this request and instructs OutputPage to enable OO...
Definition Parser.php:6505
braceSubstitution(array $piece, PPFrame $frame)
Return the text of a template, after recursively replacing any variables or templates within the temp...
Definition Parser.php:3037
getUserIdentity()
Get a user either from the user set on Parser if it's set, or from the ParserOptions object otherwise...
Definition Parser.php:1220
static formatPageTitle( $nsText, $nsSeparator, $mainText)
Add HTML tags marking the parts of a page title, to be displayed in the first heading of the page.
Definition Parser.php:6476
setUser(?UserIdentity $user)
Set the current user.
Definition Parser.php:1020
Options( $x=null)
Accessor/mutator for the ParserOptions object.
Definition Parser.php:1161
getHookContainer()
Get a HookContainer capable of returning metadata about hooks or running extension hooks.
Definition Parser.php:1698
getOutputType()
Accessor for the output type.
Definition Parser.php:1091
recursivePreprocess( $text, $frame=false)
Recursive parser entry point that can be called from an extension tag hook.
Definition Parser.php:978
getRevisionSize()
Get the size of the revision.
Definition Parser.php:6171
getPreprocessor()
Get a preprocessor object.
Definition Parser.php:1230
getStripList()
Get a list of strippable XML-like elements.
Definition Parser.php:1361
extensionSubstitution(array $params, PPFrame $frame, bool $processNowiki=false)
Return the text to be used for a given extension tag.
Definition Parser.php:4003
setHook( $tag, callable $callback)
Create an HTML-style tag, e.g.
Definition Parser.php:4989
preprocessToDom( $text, $flags=0)
Get the document object model for the given wikitext.
Definition Parser.php:2935
getSection( $text, $sectionId, $defaultText='')
This function returns the text of a section, specified by a number ($section).
Definition Parser.php:5950
static parseWidthParam( $value, $parseHeight=true)
Parsed a width param of imagelink like 300px or 200x300px.
Definition Parser.php:6390
const OT_WIKI
Output type: like Parser::preSaveTransform()
Definition Parser.php:195
fetchTemplateAndTitle(LinkTarget $link)
Fetch the unparsed text of a template and register a reference to it.
Definition Parser.php:3623
static getExternalLinkRel( $url=false, LinkTarget $title=null)
Get the rel attribute for a particular external link.
Definition Parser.php:2281
static stripOuterParagraph( $html)
Strip outer.
Definition Parser.php:6457
getRevisionRecordObject()
Get the revision record object for $this->mRevisionId.
Definition Parser.php:6064
parseExtensionTagAsTopLevelDoc( $text)
Needed by Parsoid/PHP to ensure all the hooks for extensions are run in the right order.
Definition Parser.php:932
OutputType( $x=null)
Accessor/mutator for the output type.
Definition Parser.php:1118
clearTagHooks()
Remove all tag hooks.
Definition Parser.php:5007
modifyImageHtml(File $file, array $params, string &$html)
Give hooks a chance to modify image thumbnail HTML.
Definition Parser.php:5685
static extractBody(string $text)
Strip everything but the <body> from the provided string.
Definition Parser.php:6492
getRevisionTimestamp()
Get the timestamp associated with the current revision, adjusted for the default server-local timesta...
Definition Parser.php:6119
__clone()
Allow extensions to clean up when the parser is cloned.
Definition Parser.php:585
static cleanSigInSig( $text)
Strip 3, 4 or 5 tildes out of signatures.
Definition Parser.php:4870
callParserFunction(PPFrame $frame, $function, array $args=[])
Call a parser function and return an array with text and flags.
Definition Parser.php:3419
__construct(ServiceOptions $svcOptions, MagicWordFactory $magicWordFactory, Language $contLang, ParserFactory $factory, UrlUtils $urlUtils, SpecialPageFactory $spFactory, LinkRendererFactory $linkRendererFactory, NamespaceInfo $nsInfo, LoggerInterface $logger, BadFileLookup $badFileLookup, LanguageConverterFactory $languageConverterFactory, LanguageNameUtils $languageNameUtils, HookContainer $hookContainer, TidyDriverBase $tidy, WANObjectCache $wanCache, UserOptionsLookup $userOptionsLookup, UserFactory $userFactory, TitleFormatter $titleFormatter, HttpRequestFactory $httpRequestFactory, TrackingCategories $trackingCategories, SignatureValidatorFactory $signatureValidatorFactory, UserNameUtils $userNameUtils)
Constructing parsers directly is not allowed! Use a ParserFactory.
Definition Parser.php:472
attributeStripCallback(&$text, $frame=false)
Callback from the Sanitizer for expanding items found in HTML attribute values, so they can be safely...
Definition Parser.php:5756
__destruct()
Reduce memory usage to reduce the impact of circular references.
Definition Parser.php:570
recursiveTagParse( $text, $frame=false)
Half-parse wikitext to half-parsed HTML.
Definition Parser.php:883
replaceVariables( $text, $frame=false, $argsOnly=false)
Replace magic variables, templates, and template arguments with the appropriate text.
Definition Parser.php:2960
doQuotes( $text)
Helper function for handleAllQuotes()
Definition Parser.php:2010
static replaceTableOfContentsMarker( $text, $toc)
Replace table of contents marker in parsed HTML.
Definition Parser.php:4891
const OT_HTML
Output type: like Parser::parse()
Definition Parser.php:193
recursiveTagParseFully( $text, $frame=false)
Fully parse wikitext to fully parsed HTML.
Definition Parser.php:907
fetchFileNoRegister(LinkTarget $link, array $options=[])
Helper function for fetchFileAndTitle.
Definition Parser.php:3844
getPage()
Returns the page used as context for parsing, e.g.
Definition Parser.php:1073
fetchFileAndTitle(LinkTarget $link, array $options=[])
Fetch a file and its title and register a reference to it.
Definition Parser.php:3817
fetchCurrentRevisionRecordOfTitle(LinkTarget $link)
Fetch the current revision of a given title as a RevisionRecord.
Definition Parser.php:3554
startExternalParse(?PageReference $page, ParserOptions $options, $outputType, $clearState=true, $revId=null)
Set up some variables which are usually set up in parse() so that an external function can call some ...
Definition Parser.php:4920
getFunctionLang()
Get a language object for use in parser functions such as {{FORMATNUM:}}.
Definition Parser.php:1188
resetOutput()
Reset the ParserOutput.
Definition Parser.php:673
static removeSomeTags(string $text, array $options=[])
Cleans up HTML, removes dangerous tags and attributes, and removes HTML comments; the result will alw...
WebRequest clone which takes values from a provided array.
Exception representing a failure to look up a revision.
Page revision base class.
Value object representing a content slot associated with a page revision.
Factory for handling the special page list and generating SpecialPage objects.
Parent class for all special pages.
Base class for HTML cleanup utilities.
MalformedTitleException is thrown when a TitleParser is unable to parse a title string.
A codec for MediaWiki page titles.
This is a utility class for dealing with namespaces that encodes all the "magic" behaviors of them ba...
Represents a title within MediaWiki.
Definition Title.php:79
Provides access to user options.
Creates User objects.
UserNameUtils service.
internal since 1.36
Definition User.php:93
Library for creating and parsing MW-style timestamps.
A service to expand, parse, and otherwise manipulate URLs.
Definition UrlUtils.php:16
validProtocols()
Returns a partial regular expression of recognized URL protocols, e.g.
Definition UrlUtils.php:354
Module of static functions for generating XML.
Definition Xml.php:37
static int $inParserFactory
Track calls to Parser constructor to aid in deprecation of direct Parser invocation.
Set options of the Parser.
getExpensiveParserFunctionLimit()
Maximum number of calls per parse to expensive parser functions.
getPreSaveTransform()
Transform wiki markup when saving the page?
getDisableTitleConversion()
Whether title conversion should be disabled.
Differences from DOM schema:
const DOM_FOR_INCLUSION
Transclusion mode flag for Preprocessor::preprocessToObj()
Arbitrary section name based PHP profiling.
A collection of static methods to play with strings.
static replaceMarkup( $search, $replace, $text)
More or less "markup-safe" str_replace() Ignores any instances of the separator inside <....
static explode( $separator, $subject)
Workalike for explode() with limited memory usage.
Multi-datacenter aware caching interface.
return[0=> 'ـ', 1=> ' ', 2=> '`', 3=> '´', 4=> '˜', 5=> '^', 6=> '¯', 7=> '‾', 8=> '˘', 9=> '˙', 10=> '¨', 11=> '˚', 12=> '˝', 13=> '᾽', 14=> '῝', 15=> '¸', 16=> '˛', 17=> '_', 18=> '‗', 19=> '῀', 20=> '﮲', 21=> '﮳', 22=> '﮴', 23=> '﮵', 24=> '﮶', 25=> '﮷', 26=> '﮸', 27=> '﮹', 28=> '﮺', 29=> '﮻', 30=> '﮼', 31=> '﮽', 32=> '﮾', 33=> '﮿', 34=> '﯀', 35=> '﯁', 36=> '゛', 37=> '゜', 38=> '-', 39=> '֊', 40=> '᐀', 41=> '᭠', 42=> '᠆', 43=> '᠇', 44=> '‐', 45=> '‒', 46=> '–', 47=> '—', 48=> '―', 49=> '⁓', 50=> '⸗', 51=> '゠', 52=> '・', 53=> ',', 54=> '՝', 55=> '،', 56=> '؍', 57=> '٫', 58=> '٬', 59=> '߸', 60=> '᠂', 61=> '᠈', 62=> '꓾', 63=> '꘍', 64=> '꛵', 65=> '︑', 66=> ';', 67=> '؛', 68=> '⁏', 69=> '꛶', 70=> ':', 71=> '։', 72=> '؞', 73=> '܃', 74=> '܄', 75=> '܅', 76=> '܆', 77=> '܇', 78=> '܈', 79=> '࠰', 80=> '࠱', 81=> '࠲', 82=> '࠳', 83=> '࠴', 84=> '࠵', 85=> '࠶', 86=> '࠷', 87=> '࠸', 88=> '࠹', 89=> '࠺', 90=> '࠻', 91=> '࠼', 92=> '࠽', 93=> '࠾', 94=> '፡', 95=> '፣', 96=> '፤', 97=> '፥', 98=> '፦', 99=> '᠄', 100=> '᠅', 101=> '༔', 102=> '៖', 103=> '᭝', 104=> '꧇', 105=> '᛫', 106=> '᛬', 107=> '᛭', 108=> '꛴', 109=> '!', 110=> '¡', 111=> '՜', 112=> '߹', 113=> '᥄', 114=> '?', 115=> '¿', 116=> '⸮', 117=> '՞', 118=> '؟', 119=> '܉', 120=> '፧', 121=> '᥅', 122=> '⳺', 123=> '⳻', 124=> '꘏', 125=> '꛷', 126=> '‽', 127=> '⸘', 128=> '.', 129=> '᠁', 130=> '۔', 131=> '܁', 132=> '܂', 133=> '።', 134=> '᠃', 135=> '᠉', 136=> '᙮', 137=> '᭜', 138=> '⳹', 139=> '⳾', 140=> '⸰', 141=> '꓿', 142=> '꘎', 143=> '꛳', 144=> '︒', 145=> '·', 146=> '⸱', 147=> '।', 148=> '॥', 149=> '꣎', 150=> '꣏', 151=> '᰻', 152=> '᰼', 153=> '꡶', 154=> '꡷', 155=> '᜵', 156=> '᜶', 157=> '꤯', 158=> '၊', 159=> '။', 160=> '។', 161=> '៕', 162=> '᪨', 163=> '᪩', 164=> '᪪', 165=> '᪫', 166=> '᭞', 167=> '᭟', 168=> '꧈', 169=> '꧉', 170=> '꩝', 171=> '꩞', 172=> '꩟', 173=> '꯫', 174=> '𐩖', 175=> '𐩗', 176=> '𑁇', 177=> '𑁈', 178=> '𑃀', 179=> '𑃁', 180=> '᱾', 181=> '᱿', 182=> '܀', 183=> '߷', 184=> '჻', 185=> '፠', 186=> '፨', 187=> '᨞', 188=> '᨟', 189=> '᭚', 190=> '᭛', 191=> '꧁', 192=> '꧂', 193=> '꧃', 194=> '꧄', 195=> '꧅', 196=> '꧆', 197=> '꧊', 198=> '꧋', 199=> '꧌', 200=> '꧍', 201=> '꛲', 202=> '꥟', 203=> '𐡗', 204=> '𐬺', 205=> '𐬻', 206=> '𐬼', 207=> '𐬽', 208=> '𐬾', 209=> '𐬿', 210=> '𑂾', 211=> '𑂿', 212=> '⁕', 213=> '⁖', 214=> '⁘', 215=> '⁙', 216=> '⁚', 217=> '⁛', 218=> '⁜', 219=> '⁝', 220=> '⁞', 221=> '⸪', 222=> '⸫', 223=> '⸬', 224=> '⸭', 225=> '⳼', 226=> '⳿', 227=> '⸙', 228=> '𐤿', 229=> '𐄀', 230=> '𐄁', 231=> '𐄂', 232=> '𐎟', 233=> '𐏐', 234=> '𐤟', 235=> '𒑰', 236=> '𒑱', 237=> '𒑲', 238=> '𒑳', 239=> '\'', 240=> '‘', 241=> '’', 242=> '‚', 243=> '‛', 244=> '‹', 245=> '›', 246=> '"', 247 => '“', 248 => '”', 249 => '„', 250 => '‟', 251 => '«', 252 => '»', 253 => '(', 254 => ')', 255 => '[', 256 => ']', 257 => '{', 258 => '}', 259 => '༺', 260 => '༻', 261 => '༼', 262 => '༽', 263 => '᚛', 264 => '᚜', 265 => '⁅', 266 => '⁆', 267 => '⧼', 268 => '⧽', 269 => '⦃', 270 => '⦄', 271 => '⦅', 272 => '⦆', 273 => '⦇', 274 => '⦈', 275 => '⦉', 276 => '⦊', 277 => '⦋', 278 => '⦌', 279 => '⦍', 280 => '⦎', 281 => '⦏', 282 => '⦐', 283 => '⦑', 284 => '⦒', 285 => '⦓', 286 => '⦔', 287 => '⦕', 288 => '⦖', 289 => '⦗', 290 => '⦘', 291 => '⟬', 292 => '⟭', 293 => '⟮', 294 => '⟯', 295 => '⸂', 296 => '⸃', 297 => '⸄', 298 => '⸅', 299 => '⸉', 300 => '⸊', 301 => '⸌', 302 => '⸍', 303 => '⸜', 304 => '⸝', 305 => '⸠', 306 => '⸡', 307 => '⸢', 308 => '⸣', 309 => '⸤', 310 => '⸥', 311 => '⸦', 312 => '⸧', 313 => '⸨', 314 => '⸩', 315 => '〈', 316 => '〉', 317 => '「', 318 => '」', 319 => '﹝', 320 => '﹞', 321 => '︗', 322 => '︘', 323 => '﴾', 324 => '﴿', 325 => '§', 326 => '¶', 327 => '⁋', 328 => '©', 329 => '®', 330 => '@', 331 => '*', 332 => '⁎', 333 => '⁑', 334 => '٭', 335 => '꙳', 336 => '/', 337 => '⁄', 338 => '\\', 339 => '&', 340 => '⅋', 341 => '⁊', 342 => '#', 343 => '%', 344 => '٪', 345 => '‰', 346 => '؉', 347 => '‱', 348 => '؊', 349 => '⁒', 350 => '†', 351 => '‡', 352 => '•', 353 => '‣', 354 => '‧', 355 => '⁃', 356 => '⁌', 357 => '⁍', 358 => '′', 359 => '‵', 360 => '‸', 361 => '※', 362 => '‿', 363 => '⁔', 364 => '⁀', 365 => '⁐', 366 => '⁁', 367 => '⁂', 368 => '⸀', 369 => '⸁', 370 => '⸆', 371 => '⸇', 372 => '⸈', 373 => '⸋', 374 => '⸎', 375 => '⸏', 376 => '⸐', 377 => '⸑', 378 => '⸒', 379 => '⸓', 380 => '⸔', 381 => '⸕', 382 => '⸖', 383 => '⸚', 384 => '⸛', 385 => '⸞', 386 => '⸟', 387 => '꙾', 388 => '՚', 389 => '՛', 390 => '՟', 391 => '־', 392 => '׀', 393 => '׃', 394 => '׆', 395 => '׳', 396 => '״', 397 => '܊', 398 => '܋', 399 => '܌', 400 => '܍', 401 => '࡞', 402 => '᠀', 403 => '॰', 404 => '꣸', 405 => '꣹', 406 => '꣺', 407 => '෴', 408 => '๚', 409 => '๛', 410 => '꫞', 411 => '꫟', 412 => '༄', 413 => '༅', 414 => '༆', 415 => '༇', 416 => '༈', 417 => '༉', 418 => '༊', 419 => '࿐', 420 => '࿑', 421 => '་', 422 => '།', 423 => '༎', 424 => '༏', 425 => '༐', 426 => '༑', 427 => '༒', 428 => '྅', 429 => '࿒', 430 => '࿓', 431 => '࿔', 432 => '࿙', 433 => '࿚', 434 => '᰽', 435 => '᰾', 436 => '᰿', 437 => '᥀', 438 => '၌', 439 => '၍', 440 => '၎', 441 => '၏', 442 => '႞', 443 => '႟', 444 => '꩷', 445 => '꩸', 446 => '꩹', 447 => 'ៗ', 448 => '៘', 449 => '៙', 450 => '៚', 451 => '᪠', 452 => '᪡', 453 => '᪢', 454 => '᪣', 455 => '᪤', 456 => '᪥', 457 => '᪦', 458 => '᪬', 459 => '᪭', 460 => '᙭', 461 => '⵰', 462 => '꡴', 463 => '꡵', 464 => '᯼', 465 => '᯽', 466 => '᯾', 467 => '᯿', 468 => '꤮', 469 => '꧞', 470 => '꧟', 471 => '꩜', 472 => '𑁉', 473 => '𑁊', 474 => '𑁋', 475 => '𑁌', 476 => '𑁍', 477 => '𐩐', 478 => '𐩑', 479 => '𐩒', 480 => '𐩓', 481 => '𐩔', 482 => '𐩕', 483 => '𐩘', 484 => '𐬹', 485 => '𑂻', 486 => '𑂼', 487 => 'ʹ', 488 => '͵', 489 => 'ʺ', 490 => '˂', 491 => '˃', 492 => '˄', 493 => '˅', 494 => 'ˆ', 495 => 'ˇ', 496 => 'ˈ', 497 => 'ˉ', 498 => 'ˊ', 499 => 'ˋ', 500 => 'ˌ', 501 => 'ˍ', 502 => 'ˎ', 503 => 'ˏ', 504 => '˒', 505 => '˓', 506 => '˔', 507 => '˕', 508 => '˖', 509 => '˗', 510 => '˞', 511 => '˟', 512 => '˥', 513 => '˦', 514 => '˧', 515 => '˨', 516 => '˩', 517 => '˪', 518 => '˫', 519 => 'ˬ', 520 => '˭', 521 => '˯', 522 => '˰', 523 => '˱', 524 => '˲', 525 => '˳', 526 => '˴', 527 => '˵', 528 => '˶', 529 => '˷', 530 => '˸', 531 => '˹', 532 => '˺', 533 => '˻', 534 => '˼', 535 => '˽', 536 => '˾', 537 => '˿', 538 => '᎐', 539 => '᎑', 540 => '᎒', 541 => '᎓', 542 => '᎔', 543 => '᎕', 544 => '᎖', 545 => '᎗', 546 => '᎘', 547 => '᎙', 548 => '꜀', 549 => '꜁', 550 => '꜂', 551 => '꜃', 552 => '꜄', 553 => '꜅', 554 => '꜆', 555 => '꜇', 556 => '꜈', 557 => '꜉', 558 => '꜊', 559 => '꜋', 560 => '꜌', 561 => '꜍', 562 => '꜎', 563 => '꜏', 564 => '꜐', 565 => '꜑', 566 => '꜒', 567 => '꜓', 568 => '꜔', 569 => '꜕', 570 => '꜖', 571 => 'ꜗ', 572 => 'ꜘ', 573 => 'ꜙ', 574 => 'ꜚ', 575 => 'ꜛ', 576 => 'ꜜ', 577 => 'ꜝ', 578 => 'ꜞ', 579 => 'ꜟ', 580 => '꜠', 581 => '꜡', 582 => 'ꞈ', 583 => '꞉', 584 => '꞊', 585 => '°', 586 => '҂', 587 => '؈', 588 => '؎', 589 => '؏', 590 => '۞', 591 => '۩', 592 => '﷽', 593 => '߶', 594 => '৺', 595 => '୰', 596 => '௳', 597 => '௴', 598 => '௵', 599 => '௶', 600 => '௷', 601 => '௸', 602 => '௺', 603 => '౿', 604 => '൹', 605 => '꠨', 606 => '꠩', 607 => '꠪', 608 => '꠫', 609 => '꠶', 610 => '꠷', 611 => '꠹', 612 => '๏', 613 => '༁', 614 => '༂', 615 => '༃', 616 => '༓', 617 => '༕', 618 => '༖', 619 => '༗', 620 => '༚', 621 => '༛', 622 => '༜', 623 => '༝', 624 => '༞', 625 => '༟', 626 => '༴', 627 => '༶', 628 => '༸', 629 => '྾', 630 => '྿', 631 => '࿀', 632 => '࿁', 633 => '࿂', 634 => '࿃', 635 => '࿄', 636 => '࿅', 637 => '࿇', 638 => '࿈', 639 => '࿉', 640 => '࿊', 641 => '࿋', 642 => '࿌', 643 => '࿎', 644 => '࿏', 645 => '࿕', 646 => '࿖', 647 => '࿗', 648 => '࿘', 649 => '᧠', 650 => '᧡', 651 => '᧢', 652 => '᧣', 653 => '᧤', 654 => '᧥', 655 => '᧦', 656 => '᧧', 657 => '᧨', 658 => '᧩', 659 => '᧪', 660 => '᧫', 661 => '᧬', 662 => '᧭', 663 => '᧮', 664 => '᧯', 665 => '᧰', 666 => '᧱', 667 => '᧲', 668 => '᧳', 669 => '᧴', 670 => '᧵', 671 => '᧶', 672 => '᧷', 673 => '᧸', 674 => '᧹', 675 => '᧺', 676 => '᧻', 677 => '᧼', 678 => '᧽', 679 => '᧾', 680 => '᧿', 681 => '᭡', 682 => '᭢', 683 => '᭣', 684 => '᭤', 685 => '᭥', 686 => '᭦', 687 => '᭧', 688 => '᭨', 689 => '᭩', 690 => '᭪', 691 => '᭴', 692 => '᭵', 693 => '᭶', 694 => '᭷', 695 => '᭸', 696 => '᭹', 697 => '᭺', 698 => '᭻', 699 => '᭼', 700 => '℄', 701 => '℈', 702 => '℔', 703 => '℗', 704 => '℘', 705 => '℞', 706 => '℟', 707 => '℣', 708 => '℥', 709 => '℧', 710 => '℩', 711 => '℮', 712 => '℺', 713 => '⅁', 714 => '⅂', 715 => '⅃', 716 => '⅄', 717 => '⅊', 718 => '⅌', 719 => '⅍', 720 => '⅏', 721 => '←', 722 => '→', 723 => '↑', 724 => '↓', 725 => '↔', 726 => '↕', 727 => '↖', 728 => '↗', 729 => '↘', 730 => '↙', 731 => '↜', 732 => '↝', 733 => '↞', 734 => '↟', 735 => '↠', 736 => '↡', 737 => '↢', 738 => '↣', 739 => '↤', 740 => '↥', 741 => '↦', 742 => '↧', 743 => '↨', 744 => '↩', 745 => '↪', 746 => '↫', 747 => '↬', 748 => '↭', 749 => '↯', 750 => '↰', 751 => '↱', 752 => '↲', 753 => '↳', 754 => '↴', 755 => '↵', 756 => '↶', 757 => '↷', 758 => '↸', 759 => '↹', 760 => '↺', 761 => '↻', 762 => '↼', 763 => '↽', 764 => '↾', 765 => '↿', 766 => '⇀', 767 => '⇁', 768 => '⇂', 769 => '⇃', 770 => '⇄', 771 => '⇅', 772 => '⇆', 773 => '⇇', 774 => '⇈', 775 => '⇉', 776 => '⇊', 777 => '⇋', 778 => '⇌', 779 => '⇐', 780 => '⇑', 781 => '⇒', 782 => '⇓', 783 => '⇔', 784 => '⇕', 785 => '⇖', 786 => '⇗', 787 => '⇘', 788 => '⇙', 789 => '⇚', 790 => '⇛', 791 => '⇜', 792 => '⇝', 793 => '⇞', 794 => '⇟', 795 => '⇠', 796 => '⇡', 797 => '⇢', 798 => '⇣', 799 => '⇤', 800 => '⇥', 801 => '⇦', 802 => '⇧', 803 => '⇨', 804 => '⇩', 805 => '⇪', 806 => '⇫', 807 => '⇬', 808 => '⇭', 809 => '⇮', 810 => '⇯', 811 => '⇰', 812 => '⇱', 813 => '⇲', 814 => '⇳', 815 => '⇴', 816 => '⇵', 817 => '⇶', 818 => '⇷', 819 => '⇸', 820 => '⇹', 821 => '⇺', 822 => '⇻', 823 => '⇼', 824 => '⇽', 825 => '⇾', 826 => '⇿', 827 => '∀', 828 => '∁', 829 => '∂', 830 => '∃', 831 => '∅', 832 => '∆', 833 => '∇', 834 => '∈', 835 => '∊', 836 => '∋', 837 => '∍', 838 => '϶', 839 => '∎', 840 => '∏', 841 => '∐', 842 => '∑', 843 => '+', 844 => '±', 845 => '÷', 846 => '×', 847 => '<', 848 => '=', 849 => '>', 850 => '¬', 851 => '|', 852 => '¦', 853 => '‖', 854 => '~', 855 => '−', 856 => '∓', 857 => '∔', 858 => '∕', 859 => '∖', 860 => '∗', 861 => '∘', 862 => '∙', 863 => '√', 864 => '∛', 865 => '؆', 866 => '∜', 867 => '؇', 868 => '∝', 869 => '∞', 870 => '∟', 871 => '∠', 872 => '∡', 873 => '∢', 874 => '∣', 875 => '∥', 876 => '∧', 877 => '∨', 878 => '∩', 879 => '∪', 880 => '∫', 881 => '∮', 882 => '∱', 883 => '∲', 884 => '∳', 885 => '∴', 886 => '∵', 887 => '∶', 888 => '∷', 889 => '∸', 890 => '∹', 891 => '∺', 892 => '∻', 893 => '∼', 894 => '∽', 895 => '∾', 896 => '∿', 897 => '≀', 898 => '≂', 899 => '≃', 900 => '≅', 901 => '≆', 902 => '≈', 903 => '≊', 904 => '≋', 905 => '≌', 906 => '≍', 907 => '≎', 908 => '≏', 909 => '≐', 910 => '≑', 911 => '≒', 912 => '≓', 913 => '≔', 914 => '≕', 915 => '≖', 916 => '≗', 917 => '≘', 918 => '≙', 919 => '≚', 920 => '≛', 921 => '≜', 922 => '≝', 923 => '≞', 924 => '≟', 925 => '≡', 926 => '≣', 927 => '≤', 928 => '≥', 929 => '≦', 930 => '≧', 931 => '≨', 932 => '≩', 933 => '≪', 934 => '≫', 935 => '≬', 936 => '≲', 937 => '≳', 938 => '≶', 939 => '≷', 940 => '≺', 941 => '≻', 942 => '≼', 943 => '≽', 944 => '≾', 945 => '≿', 946 => '⊂', 947 => '⊃', 948 => '⊆', 949 => '⊇', 950 => '⊊', 951 => '⊋', 952 => '⊌', 953 => '⊍', 954 => '⊎', 955 => '⊏', 956 => '⊐', 957 => '⊑', 958 => '⊒', 959 => '⊓', 960 => '⊔', 961 => '⊕', 962 => '⊖', 963 => '⊗', 964 => '⊘', 965 => '⊙', 966 => '⊚', 967 => '⊛', 968 => '⊜', 969 => '⊝', 970 => '⊞', 971 => '⊟', 972 => '⊠', 973 => '⊡', 974 => '⊢', 975 => '⊣', 976 => '⊤', 977 => '⊥', 978 => '⊦', 979 => '⊧', 980 => '⊨', 981 => '⊩', 982 => '⊪', 983 => '⊫', 984 => '⊰', 985 => '⊱', 986 => '⊲', 987 => '⊳', 988 => '⊴', 989 => '⊵', 990 => '⊶', 991 => '⊷', 992 => '⊸', 993 => '⊹', 994 => '⊺', 995 => '⊻', 996 => '⊼', 997 => '⊽', 998 => '⊾', 999 => '⊿', 1000 => '⋀', 1001 => '⋁', 1002 => '⋂', 1003 => '⋃', 1004 => '⋄', 1005 => '⋅', 1006 => '⋆', 1007 => '⋇', 1008 => '⋈', 1009 => '⋉', 1010 => '⋊', 1011 => '⋋', 1012 => '⋌', 1013 => '⋍', 1014 => '⋎', 1015 => '⋏', 1016 => '⋐', 1017 => '⋑', 1018 => '⋒', 1019 => '⋓', 1020 => '⋔', 1021 => '⋕', 1022 => '⋖', 1023 => '⋗', 1024 => '⋘', 1025 => '⋙', 1026 => '⋚', 1027 => '⋛', 1028 => '⋜', 1029 => '⋝', 1030 => '⋞', 1031 => '⋟', 1032 => '⋤', 1033 => '⋥', 1034 => '⋦', 1035 => '⋧', 1036 => '⋨', 1037 => '⋩', 1038 => '⋮', 1039 => '⋯', 1040 => '⋰', 1041 => '⋱', 1042 => '⋲', 1043 => '⋳', 1044 => '⋴', 1045 => '⋵', 1046 => '⋶', 1047 => '⋷', 1048 => '⋸', 1049 => '⋹', 1050 => '⋺', 1051 => '⋻', 1052 => '⋼', 1053 => '⋽', 1054 => '⋾', 1055 => '⋿', 1056 => '⌀', 1057 => '⌁', 1058 => '⌂', 1059 => '⌃', 1060 => '⌄', 1061 => '⌅', 1062 => '⌆', 1063 => '⌇', 1064 => '⌈', 1065 => '⌉', 1066 => '⌊', 1067 => '⌋', 1068 => '⌌', 1069 => '⌍', 1070 => '⌎', 1071 => '⌏', 1072 => '⌐', 1073 => '⌑', 1074 => '⌒', 1075 => '⌓', 1076 => '⌔', 1077 => '⌕', 1078 => '⌖', 1079 => '⌗', 1080 => '⌘', 1081 => '⌙', 1082 => '⌚', 1083 => '⌛', 1084 => '⌜', 1085 => '⌝', 1086 => '⌞', 1087 => '⌟', 1088 => '⌠', 1089 => '⌡', 1090 => '⌢', 1091 => '⌣', 1092 => '⌤', 1093 => '⌥', 1094 => '⌦', 1095 => '⌧', 1096 => '⌨', 1097 => '⌫', 1098 => '⌬', 1099 => '⌭', 1100 => '⌮', 1101 => '⌯', 1102 => '⌰', 1103 => '⌱', 1104 => '⌲', 1105 => '⌳', 1106 => '⌴', 1107 => '⌵', 1108 => '⌶', 1109 => '⌷', 1110 => '⌸', 1111 => '⌹', 1112 => '⌺', 1113 => '⌻', 1114 => '⌼', 1115 => '⌽', 1116 => '⌾', 1117 => '⌿', 1118 => '⍀', 1119 => '⍁', 1120 => '⍂', 1121 => '⍃', 1122 => '⍄', 1123 => '⍅', 1124 => '⍆', 1125 => '⍇', 1126 => '⍈', 1127 => '⍉', 1128 => '⍊', 1129 => '⍋', 1130 => '⍌', 1131 => '⍍', 1132 => '⍎', 1133 => '⍏', 1134 => '⍐', 1135 => '⍑', 1136 => '⍒', 1137 => '⍓', 1138 => '⍔', 1139 => '⍕', 1140 => '⍖', 1141 => '⍗', 1142 => '⍘', 1143 => '⍙', 1144 => '⍚', 1145 => '⍛', 1146 => '⍜', 1147 => '⍝', 1148 => '⍞', 1149 => '⍟', 1150 => '⍠', 1151 => '⍡', 1152 => '⍢', 1153 => '⍣', 1154 => '⍤', 1155 => '⍥', 1156 => '⍦', 1157 => '⍧', 1158 => '⍨', 1159 => '⍩', 1160 => '⍪', 1161 => '⍫', 1162 => '⍬', 1163 => '⍭', 1164 => '⍮', 1165 => '⍯', 1166 => '⍰', 1167 => '⍱', 1168 => '⍲', 1169 => '⍳', 1170 => '⍴', 1171 => '⍵', 1172 => '⍶', 1173 => '⍷', 1174 => '⍸', 1175 => '⍹', 1176 => '⍺', 1177 => '⍻', 1178 => '⍼', 1179 => '⍽', 1180 => '⍾', 1181 => '⍿', 1182 => '⎀', 1183 => '⎁', 1184 => '⎂', 1185 => '⎃', 1186 => '⎄', 1187 => '⎅', 1188 => '⎆', 1189 => '⎇', 1190 => '⎈', 1191 => '⎉', 1192 => '⎊', 1193 => '⎋', 1194 => '⎌', 1195 => '⎍', 1196 => '⎎', 1197 => '⎏', 1198 => '⎐', 1199 => '⎑', 1200 => '⎒', 1201 => '⎓', 1202 => '⎔', 1203 => '⎕', 1204 => '⎖', 1205 => '⎗', 1206 => '⎘', 1207 => '⎙', 1208 => '⎚', 1209 => '⎛', 1210 => '⎜', 1211 => '⎝', 1212 => '⎞', 1213 => '⎟', 1214 => '⎠', 1215 => '⎡', 1216 => '⎢', 1217 => '⎣', 1218 => '⎤', 1219 => '⎥', 1220 => '⎦', 1221 => '⎧', 1222 => '⎨', 1223 => '⎩', 1224 => '⎪', 1225 => '⎫', 1226 => '⎬', 1227 => '⎭', 1228 => '⎮', 1229 => '⎯', 1230 => '⎰', 1231 => '⎱', 1232 => '⎲', 1233 => '⎳', 1234 => '⎴', 1235 => '⎵', 1236 => '⎶', 1237 => '⎷', 1238 => '⎸', 1239 => '⎹', 1240 => '⎺', 1241 => '⎻', 1242 => '⎼', 1243 => '⎽', 1244 => '⎾', 1245 => '⎿', 1246 => '⏀', 1247 => '⏁', 1248 => '⏂', 1249 => '⏃', 1250 => '⏄', 1251 => '⏅', 1252 => '⏆', 1253 => '⏇', 1254 => '⏈', 1255 => '⏉', 1256 => '⏊', 1257 => '⏋', 1258 => '⏌', 1259 => '⏍', 1260 => '⏎', 1261 => '⏏', 1262 => '⏐', 1263 => '⏑', 1264 => '⏒', 1265 => '⏓', 1266 => '⏔', 1267 => '⏕', 1268 => '⏖', 1269 => '⏗', 1270 => '⏘', 1271 => '⏙', 1272 => '⏚', 1273 => '⏛', 1274 => '⏜', 1275 => '⏝', 1276 => '⏞', 1277 => '⏟', 1278 => '⏠', 1279 => '⏡', 1280 => '⏢', 1281 => '⏣', 1282 => '⏤', 1283 => '⏥', 1284 => '⏦', 1285 => '⏧', 1286 => '⏨', 1287 => '⏩', 1288 => '⏪', 1289 => '⏫', 1290 => '⏬', 1291 => '⏭', 1292 => '⏮', 1293 => '⏯', 1294 => '⏰', 1295 => '⏱', 1296 => '⏲', 1297 => '⏳', 1298 => '␀', 1299 => '␁', 1300 => '␂', 1301 => '␃', 1302 => '␄', 1303 => '␅', 1304 => '␆', 1305 => '␇', 1306 => '␈', 1307 => '␉', 1308 => '␊', 1309 => '␋', 1310 => '␌', 1311 => '␍', 1312 => '␎', 1313 => '␏', 1314 => '␐', 1315 => '␑', 1316 => '␒', 1317 => '␓', 1318 => '␔', 1319 => '␕', 1320 => '␖', 1321 => '␗', 1322 => '␘', 1323 => '␙', 1324 => '␚', 1325 => '␛', 1326 => '␜', 1327 => '␝', 1328 => '␞', 1329 => '␟', 1330 => '␠', 1331 => '␡', 1332 => '␢', 1333 => '␣', 1334 => '␤', 1335 => '␥', 1336 => '␦', 1337 => '⑀', 1338 => '⑁', 1339 => '⑂', 1340 => '⑃', 1341 => '⑄', 1342 => '⑅', 1343 => '⑆', 1344 => '⑇', 1345 => '⑈', 1346 => '⑉', 1347 => '⑊', 1348 => '─', 1349 => '━', 1350 => '│', 1351 => '┃', 1352 => '┄', 1353 => '┅', 1354 => '┆', 1355 => '┇', 1356 => '┈', 1357 => '┉', 1358 => '┊', 1359 => '┋', 1360 => '┌', 1361 => '┍', 1362 => '┎', 1363 => '┏', 1364 => '┐', 1365 => '┑', 1366 => '┒', 1367 => '┓', 1368 => '└', 1369 => '┕', 1370 => '┖', 1371 => '┗', 1372 => '┘', 1373 => '┙', 1374 => '┚', 1375 => '┛', 1376 => '├', 1377 => '┝', 1378 => '┞', 1379 => '┟', 1380 => '┠', 1381 => '┡', 1382 => '┢', 1383 => '┣', 1384 => '┤', 1385 => '┥', 1386 => '┦', 1387 => '┧', 1388 => '┨', 1389 => '┩', 1390 => '┪', 1391 => '┫', 1392 => '┬', 1393 => '┭', 1394 => '┮', 1395 => '┯', 1396 => '┰', 1397 => '┱', 1398 => '┲', 1399 => '┳', 1400 => '┴', 1401 => '┵', 1402 => '┶', 1403 => '┷', 1404 => '┸', 1405 => '┹', 1406 => '┺', 1407 => '┻', 1408 => '┼', 1409 => '┽', 1410 => '┾', 1411 => '┿', 1412 => '╀', 1413 => '╁', 1414 => '╂', 1415 => '╃', 1416 => '╄', 1417 => '╅', 1418 => '╆', 1419 => '╇', 1420 => '╈', 1421 => '╉', 1422 => '╊', 1423 => '╋', 1424 => '╌', 1425 => '╍', 1426 => '╎', 1427 => '╏', 1428 => '═', 1429 => '║', 1430 => '╒', 1431 => '╓', 1432 => '╔', 1433 => '╕', 1434 => '╖', 1435 => '╗', 1436 => '╘', 1437 => '╙', 1438 => '╚', 1439 => '╛', 1440 => '╜', 1441 => '╝', 1442 => '╞', 1443 => '╟', 1444 => '╠', 1445 => '╡', 1446 => '╢', 1447 => '╣', 1448 => '╤', 1449 => '╥', 1450 => '╦', 1451 => '╧', 1452 => '╨', 1453 => '╩', 1454 => '╪', 1455 => '╫', 1456 => '╬', 1457 => '╭', 1458 => '╮', 1459 => '╯', 1460 => '╰', 1461 => '╱', 1462 => '╲', 1463 => '╳', 1464 => '╴', 1465 => '╵', 1466 => '╶', 1467 => '╷', 1468 => '╸', 1469 => '╹', 1470 => '╺', 1471 => '╻', 1472 => '╼', 1473 => '╽', 1474 => '╾', 1475 => '╿', 1476 => '▀', 1477 => '▁', 1478 => '▂', 1479 => '▃', 1480 => '▄', 1481 => '▅', 1482 => '▆', 1483 => '▇', 1484 => '█', 1485 => '▉', 1486 => '▊', 1487 => '▋', 1488 => '▌', 1489 => '▍', 1490 => '▎', 1491 => '▏', 1492 => '▐', 1493 => '░', 1494 => '▒', 1495 => '▓', 1496 => '▔', 1497 => '▕', 1498 => '▖', 1499 => '▗', 1500 => '▘', 1501 => '▙', 1502 => '▚', 1503 => '▛', 1504 => '▜', 1505 => '▝', 1506 => '▞', 1507 => '▟', 1508 => '■', 1509 => '□', 1510 => '▢', 1511 => '▣', 1512 => '▤', 1513 => '▥', 1514 => '▦', 1515 => '▧', 1516 => '▨', 1517 => '▩', 1518 => '▪', 1519 => '▫', 1520 => '▬', 1521 => '▭', 1522 => '▮', 1523 => '▯', 1524 => '▰', 1525 => '▱', 1526 => '▲', 1527 => '△', 1528 => '▴', 1529 => '▵', 1530 => '▶', 1531 => '▷', 1532 => '▸', 1533 => '▹', 1534 => '►', 1535 => '▻', 1536 => '▼', 1537 => '▽', 1538 => '▾', 1539 => '▿', 1540 => '◀', 1541 => '◁', 1542 => '◂', 1543 => '◃', 1544 => '◄', 1545 => '◅', 1546 => '◆', 1547 => '◇', 1548 => '◈', 1549 => '◉', 1550 => '◊', 1551 => '○', 1552 => '◌', 1553 => '◍', 1554 => '◎', 1555 => '●', 1556 => '◐', 1557 => '◑', 1558 => '◒', 1559 => '◓', 1560 => '◔', 1561 => '◕', 1562 => '◖', 1563 => '◗', 1564 => '◘', 1565 => '◙', 1566 => '◚', 1567 => '◛', 1568 => '◜', 1569 => '◝', 1570 => '◞', 1571 => '◟', 1572 => '◠', 1573 => '◡', 1574 => '◢', 1575 => '◣', 1576 => '◤', 1577 => '◥', 1578 => '◦', 1579 => '◧', 1580 => '◨', 1581 => '◩', 1582 => '◪', 1583 => '◫', 1584 => '◬', 1585 => '◭', 1586 => '◮', 1587 => '◯', 1588 => '◰', 1589 => '◱', 1590 => '◲', 1591 => '◳', 1592 => '◴', 1593 => '◵', 1594 => '◶', 1595 => '◷', 1596 => '◸', 1597 => '◹', 1598 => '◺', 1599 => '◻', 1600 => '◼', 1601 => '◽', 1602 => '◾', 1603 => '◿', 1604 => '☀', 1605 => '☁', 1606 => '☂', 1607 => '☃', 1608 => '☄', 1609 => '★', 1610 => '☆', 1611 => '☇', 1612 => '☈', 1613 => '☉', 1614 => '☊', 1615 => '☋', 1616 => '☌', 1617 => '☍', 1618 => '☎', 1619 => '☏', 1620 => '☐', 1621 => '☑', 1622 => '☒', 1623 => '☓', 1624 => '☔', 1625 => '☕', 1626 => '☖', 1627 => '☗', 1628 => '☘', 1629 => '☙', 1630 => '☚', 1631 => '☛', 1632 => '☜', 1633 => '☝', 1634 => '☞', 1635 => '☟', 1636 => '☠', 1637 => '☡', 1638 => '☢', 1639 => '☣', 1640 => '☤', 1641 => '☥', 1642 => '☦', 1643 => '☧', 1644 => '☨', 1645 => '☩', 1646 => '☪', 1647 => '☫', 1648 => '☬', 1649 => '☭', 1650 => '☮', 1651 => '☯', 1652 => '☸', 1653 => '☹', 1654 => '☺', 1655 => '☻', 1656 => '☼', 1657 => '☽', 1658 => '☾', 1659 => '☿', 1660 => '♀', 1661 => '♁', 1662 => '♂', 1663 => '♃', 1664 => '♄', 1665 => '♅', 1666 => '♆', 1667 => '♇', 1668 => '♈', 1669 => '♉', 1670 => '♊', 1671 => '♋', 1672 => '♌', 1673 => '♍', 1674 => '♎', 1675 => '♏', 1676 => '♐', 1677 => '♑', 1678 => '♒', 1679 => '♓', 1680 => '♔', 1681 => '♕', 1682 => '♖', 1683 => '♗', 1684 => '♘', 1685 => '♙', 1686 => '♚', 1687 => '♛', 1688 => '♜', 1689 => '♝', 1690 => '♞', 1691 => '♟', 1692 => '♠', 1693 => '♡', 1694 => '♢', 1695 => '♣', 1696 => '♤', 1697 => '♥', 1698 => '♦', 1699 => '♧', 1700 => '♨', 1701 => '♩', 1702 => '♪', 1703 => '♫', 1704 => '♬', 1705 => '♰', 1706 => '♱', 1707 => '♲', 1708 => '♳', 1709 => '♴', 1710 => '♵', 1711 => '♶', 1712 => '♷', 1713 => '♸', 1714 => '♹', 1715 => '♺', 1716 => '♻', 1717 => '♼', 1718 => '♽', 1719 => '♾', 1720 => '♿', 1721 => '⚀', 1722 => '⚁', 1723 => '⚂', 1724 => '⚃', 1725 => '⚄', 1726 => '⚅', 1727 => '⚆', 1728 => '⚇', 1729 => '⚈', 1730 => '⚉', 1731 => '⚐', 1732 => '⚑', 1733 => '⚒', 1734 => '⚓', 1735 => '⚔', 1736 => '⚕', 1737 => '⚖', 1738 => '⚗', 1739 => '⚘', 1740 => '⚙', 1741 => '⚚', 1742 => '⚛', 1743 => '⚜', 1744 => '⚝', 1745 => '⚞', 1746 => '⚟', 1747 => '⚠', 1748 => '⚡', 1749 => '⚢', 1750 => '⚣', 1751 => '⚤', 1752 => '⚥', 1753 => '⚦', 1754 => '⚧', 1755 => '⚨', 1756 => '⚩', 1757 => '⚪', 1758 => '⚫', 1759 => '⚬', 1760 => '⚭', 1761 => '⚮', 1762 => '⚯', 1763 => '⚰', 1764 => '⚱', 1765 => '⚲', 1766 => '⚳', 1767 => '⚴', 1768 => '⚵', 1769 => '⚶', 1770 => '⚷', 1771 => '⚸', 1772 => '⚹', 1773 => '⚺', 1774 => '⚻', 1775 => '⚼', 1776 => '⚽', 1777 => '⚾', 1778 => '⚿', 1779 => '⛀', 1780 => '⛁', 1781 => '⛂', 1782 => '⛃', 1783 => '⛄', 1784 => '⛅', 1785 => '⛆', 1786 => '⛇', 1787 => '⛈', 1788 => '⛉', 1789 => '⛊', 1790 => '⛋', 1791 => '⛌', 1792 => '⛍', 1793 => '⛎', 1794 => '⛏', 1795 => '⛐', 1796 => '⛑', 1797 => '⛒', 1798 => '⛓', 1799 => '⛔', 1800 => '⛕', 1801 => '⛖', 1802 => '⛗', 1803 => '⛘', 1804 => '⛙', 1805 => '⛚', 1806 => '⛛', 1807 => '⛜', 1808 => '⛝', 1809 => '⛞', 1810 => '⛟', 1811 => '⛠', 1812 => '⛡', 1813 => '⛢', 1814 => '⛣', 1815 => '⛤', 1816 => '⛥', 1817 => '⛦', 1818 => '⛧', 1819 => '⛨', 1820 => '⛩', 1821 => '⛪', 1822 => '⛫', 1823 => '⛬', 1824 => '⛭', 1825 => '⛮', 1826 => '⛯', 1827 => '⛰', 1828 => '⛱', 1829 => '⛲', 1830 => '⛳', 1831 => '⛴', 1832 => '⛵', 1833 => '⛶', 1834 => '⛷', 1835 => '⛸', 1836 => '⛹', 1837 => '⛺', 1838 => '⛻', 1839 => '⛼', 1840 => '⛽', 1841 => '⛾', 1842 => '⛿', 1843 => '✁', 1844 => '✂', 1845 => '✃', 1846 => '✄', 1847 => '✅', 1848 => '✆', 1849 => '✇', 1850 => '✈', 1851 => '✉', 1852 => '✊', 1853 => '✋', 1854 => '✌', 1855 => '✍', 1856 => '✎', 1857 => '✏', 1858 => '✐', 1859 => '✑', 1860 => '✒', 1861 => '✓', 1862 => '✔', 1863 => '✕', 1864 => '✖', 1865 => '✗', 1866 => '✘', 1867 => '✙', 1868 => '✚', 1869 => '✛', 1870 => '✜', 1871 => '✝', 1872 => '✞', 1873 => '✟', 1874 => '✠', 1875 => '✡', 1876 => '✢', 1877 => '✣', 1878 => '✤', 1879 => '✥', 1880 => '✦', 1881 => '✧', 1882 => '✨', 1883 => '✩', 1884 => '✪', 1885 => '✫', 1886 => '✬', 1887 => '✭', 1888 => '✮', 1889 => '✯', 1890 => '✰', 1891 => '✱', 1892 => '✲', 1893 => '✳', 1894 => '✴', 1895 => '✵', 1896 => '✶', 1897 => '✷', 1898 => '✸', 1899 => '✹', 1900 => '✺', 1901 => '✻', 1902 => '✼', 1903 => '✽', 1904 => '✾', 1905 => '✿', 1906 => '❀', 1907 => '❁', 1908 => '❂', 1909 => '❃', 1910 => '❄', 1911 => '❅', 1912 => '❆', 1913 => '❇', 1914 => '❈', 1915 => '❉', 1916 => '❊', 1917 => '❋', 1918 => '❌', 1919 => '❍', 1920 => '❎', 1921 => '❏', 1922 => '❐', 1923 => '❑', 1924 => '❒', 1925 => '❓', 1926 => '❔', 1927 => '❕', 1928 => '❖', 1929 => '❗', 1930 => '❘', 1931 => '❙', 1932 => '❚', 1933 => '❛', 1934 => '❜', 1935 => '❝', 1936 => '❞', 1937 => '❟', 1938 => '❠', 1939 => '❡', 1940 => '❢', 1941 => '❣', 1942 => '❤', 1943 => '❥', 1944 => '❦', 1945 => '❧', 1946 => '❨', 1947 => '❩', 1948 => '❪', 1949 => '❫', 1950 => '❬', 1951 => '❭', 1952 => '❮', 1953 => '❯', 1954 => '❰', 1955 => '❱', 1956 => '❲', 1957 => '❳', 1958 => '❴', 1959 => '❵', 1960 => '➔', 1961 => '➕', 1962 => '➖', 1963 => '➗', 1964 => '➘', 1965 => '➙', 1966 => '➚', 1967 => '➛', 1968 => '➜', 1969 => '➝', 1970 => '➞', 1971 => '➟', 1972 => '➠', 1973 => '➡', 1974 => '➢', 1975 => '➣', 1976 => '➤', 1977 => '➥', 1978 => '➦', 1979 => '➧', 1980 => '➨', 1981 => '➩', 1982 => '➪', 1983 => '➫', 1984 => '➬', 1985 => '➭', 1986 => '➮', 1987 => '➯', 1988 => '➰', 1989 => '➱', 1990 => '➲', 1991 => '➳', 1992 => '➴', 1993 => '➵', 1994 => '➶', 1995 => '➷', 1996 => '➸', 1997 => '➹', 1998 => '➺', 1999 => '➻', 2000 => '➼', 2001 => '➽', 2002 => '➾', 2003 => '➿', 2004 => '⟀', 2005 => '⟁', 2006 => '⟂', 2007 => '⟃', 2008 => '⟄', 2009 => '⟅', 2010 => '⟆', 2011 => '⟇', 2012 => '⟈', 2013 => '⟉', 2014 => '⟊', 2015 => '⟌', 2016 => '⟎', 2017 => '⟏', 2018 => '⟐', 2019 => '⟑', 2020 => '⟒', 2021 => '⟓', 2022 => '⟔', 2023 => '⟕', 2024 => '⟖', 2025 => '⟗', 2026 => '⟘', 2027 => '⟙', 2028 => '⟚', 2029 => '⟛', 2030 => '⟜', 2031 => '⟝', 2032 => '⟞', 2033 => '⟟', 2034 => '⟠', 2035 => '⟡', 2036 => '⟢', 2037 => '⟣', 2038 => '⟤', 2039 => '⟥', 2040 => '⟦', 2041 => '⟧', 2042 => '⟨', 2043 => '⟩', 2044 => '⟪', 2045 => '⟫', 2046 => '⟰', 2047 => '⟱', 2048 => '⟲', 2049 => '⟳', 2050 => '⟴', 2051 => '⟵', 2052 => '⟶', 2053 => '⟷', 2054 => '⟸', 2055 => '⟹', 2056 => '⟺', 2057 => '⟻', 2058 => '⟼', 2059 => '⟽', 2060 => '⟾', 2061 => '⟿', 2062 => '⤀', 2063 => '⤁', 2064 => '⤂', 2065 => '⤃', 2066 => '⤄', 2067 => '⤅', 2068 => '⤆', 2069 => '⤇', 2070 => '⤈', 2071 => '⤉', 2072 => '⤊', 2073 => '⤋', 2074 => '⤌', 2075 => '⤍', 2076 => '⤎', 2077 => '⤏', 2078 => '⤐', 2079 => '⤑', 2080 => '⤒', 2081 => '⤓', 2082 => '⤔', 2083 => '⤕', 2084 => '⤖', 2085 => '⤗', 2086 => '⤘', 2087 => '⤙', 2088 => '⤚', 2089 => '⤛', 2090 => '⤜', 2091 => '⤝', 2092 => '⤞', 2093 => '⤟', 2094 => '⤠', 2095 => '⤡', 2096 => '⤢', 2097 => '⤣', 2098 => '⤤', 2099 => '⤥', 2100 => '⤦', 2101 => '⤧', 2102 => '⤨', 2103 => '⤩', 2104 => '⤪', 2105 => '⤫', 2106 => '⤬', 2107 => '⤭', 2108 => '⤮', 2109 => '⤯', 2110 => '⤰', 2111 => '⤱', 2112 => '⤲', 2113 => '⤳', 2114 => '⤴', 2115 => '⤵', 2116 => '⤶', 2117 => '⤷', 2118 => '⤸', 2119 => '⤹', 2120 => '⤺', 2121 => '⤻', 2122 => '⤼', 2123 => '⤽', 2124 => '⤾', 2125 => '⤿', 2126 => '⥀', 2127 => '⥁', 2128 => '⥂', 2129 => '⥃', 2130 => '⥄', 2131 => '⥅', 2132 => '⥆', 2133 => '⥇', 2134 => '⥈', 2135 => '⥉', 2136 => '⥊', 2137 => '⥋', 2138 => '⥌', 2139 => '⥍', 2140 => '⥎', 2141 => '⥏', 2142 => '⥐', 2143 => '⥑', 2144 => '⥒', 2145 => '⥓', 2146 => '⥔', 2147 => '⥕', 2148 => '⥖', 2149 => '⥗', 2150 => '⥘', 2151 => '⥙', 2152 => '⥚', 2153 => '⥛', 2154 => '⥜', 2155 => '⥝', 2156 => '⥞', 2157 => '⥟', 2158 => '⥠', 2159 => '⥡', 2160 => '⥢', 2161 => '⥣', 2162 => '⥤', 2163 => '⥥', 2164 => '⥦', 2165 => '⥧', 2166 => '⥨', 2167 => '⥩', 2168 => '⥪', 2169 => '⥫', 2170 => '⥬', 2171 => '⥭', 2172 => '⥮', 2173 => '⥯', 2174 => '⥰', 2175 => '⥱', 2176 => '⥲', 2177 => '⥳', 2178 => '⥴', 2179 => '⥵', 2180 => '⥶', 2181 => '⥷', 2182 => '⥸', 2183 => '⥹', 2184 => '⥺', 2185 => '⥻', 2186 => '⥼', 2187 => '⥽', 2188 => '⥾', 2189 => '⥿', 2190 => '⦀', 2191 => '⦁', 2192 => '⦂', 2193 => '⦙', 2194 => '⦚', 2195 => '⦛', 2196 => '⦜', 2197 => '⦝', 2198 => '⦞', 2199 => '⦟', 2200 => '⦠', 2201 => '⦡', 2202 => '⦢', 2203 => '⦣', 2204 => '⦤', 2205 => '⦥', 2206 => '⦦', 2207 => '⦧', 2208 => '⦨', 2209 => '⦩', 2210 => '⦪', 2211 => '⦫', 2212 => '⦬', 2213 => '⦭', 2214 => '⦮', 2215 => '⦯', 2216 => '⦰', 2217 => '⦱', 2218 => '⦲', 2219 => '⦳', 2220 => '⦴', 2221 => '⦵', 2222 => '⦶', 2223 => '⦷', 2224 => '⦸', 2225 => '⦹', 2226 => '⦺', 2227 => '⦻', 2228 => '⦼', 2229 => '⦽', 2230 => '⦾', 2231 => '⦿', 2232 => '⧀', 2233 => '⧁', 2234 => '⧂', 2235 => '⧃', 2236 => '⧄', 2237 => '⧅', 2238 => '⧆', 2239 => '⧇', 2240 => '⧈', 2241 => '⧉', 2242 => '⧊', 2243 => '⧋', 2244 => '⧌', 2245 => '⧍', 2246 => '⧎', 2247 => '⧏', 2248 => '⧐', 2249 => '⧑', 2250 => '⧒', 2251 => '⧓', 2252 => '⧔', 2253 => '⧕', 2254 => '⧖', 2255 => '⧗', 2256 => '⧘', 2257 => '⧙', 2258 => '⧚', 2259 => '⧛', 2260 => '⧜', 2261 => '⧝', 2262 => '⧞', 2263 => '⧟', 2264 => '⧠', 2265 => '⧡', 2266 => '⧢', 2267 => '⧣', 2268 => '⧤', 2269 => '⧥', 2270 => '⧦', 2271 => '⧧', 2272 => '⧨', 2273 => '⧩', 2274 => '⧪', 2275 => '⧫', 2276 => '⧬', 2277 => '⧭', 2278 => '⧮', 2279 => '⧯', 2280 => '⧰', 2281 => '⧱', 2282 => '⧲', 2283 => '⧳', 2284 => '⧴', 2285 => '⧵', 2286 => '⧶', 2287 => '⧷', 2288 => '⧸', 2289 => '⧹', 2290 => '⧺', 2291 => '⧻', 2292 => '⧾', 2293 => '⧿', 2294 => '⨀', 2295 => '⨁', 2296 => '⨂', 2297 => '⨃', 2298 => '⨄', 2299 => '⨅', 2300 => '⨆', 2301 => '⨇', 2302 => '⨈', 2303 => '⨉', 2304 => '⨊', 2305 => '⨋', 2306 => '⨍', 2307 => '⨎', 2308 => '⨏', 2309 => '⨐', 2310 => '⨑', 2311 => '⨒', 2312 => '⨓', 2313 => '⨔', 2314 => '⨕', 2315 => '⨖', 2316 => '⨗', 2317 => '⨘', 2318 => '⨙', 2319 => '⨚', 2320 => '⨛', 2321 => '⨜', 2322 => '⨝', 2323 => '⨞', 2324 => '⨟', 2325 => '⨠', 2326 => '⨡', 2327 => '⨢', 2328 => '⨣', 2329 => '⨤', 2330 => '⨥', 2331 => '⨦', 2332 => '⨧', 2333 => '⨨', 2334 => '⨩', 2335 => '⨪', 2336 => '⨫', 2337 => '⨬', 2338 => '⨭', 2339 => '⨮', 2340 => '⨯', 2341 => '⨰', 2342 => '⨱', 2343 => '⨲', 2344 => '⨳', 2345 => '⨴', 2346 => '⨵', 2347 => '⨶', 2348 => '⨷', 2349 => '⨸', 2350 => '⨹', 2351 => '⨺', 2352 => '⨻', 2353 => '⨼', 2354 => '⨽', 2355 => '⨾', 2356 => '⨿', 2357 => '⩀', 2358 => '⩁', 2359 => '⩂', 2360 => '⩃', 2361 => '⩄', 2362 => '⩅', 2363 => '⩆', 2364 => '⩇', 2365 => '⩈', 2366 => '⩉', 2367 => '⩊', 2368 => '⩋', 2369 => '⩌', 2370 => '⩍', 2371 => '⩎', 2372 => '⩏', 2373 => '⩐', 2374 => '⩑', 2375 => '⩒', 2376 => '⩓', 2377 => '⩔', 2378 => '⩕', 2379 => '⩖', 2380 => '⩗', 2381 => '⩘', 2382 => '⩙', 2383 => '⩚', 2384 => '⩛', 2385 => '⩜', 2386 => '⩝', 2387 => '⩞', 2388 => '⩟', 2389 => '⩠', 2390 => '⩡', 2391 => '⩢', 2392 => '⩣', 2393 => '⩤', 2394 => '⩥', 2395 => '⩦', 2396 => '⩧', 2397 => '⩨', 2398 => '⩩', 2399 => '⩪', 2400 => '⩫', 2401 => '⩬', 2402 => '⩭', 2403 => '⩮', 2404 => '⩯', 2405 => '⩰', 2406 => '⩱', 2407 => '⩲', 2408 => '⩳', 2409 => '⩷', 2410 => '⩸', 2411 => '⩹', 2412 => '⩺', 2413 => '⩻', 2414 => '⩼', 2415 => '⩽', 2416 => '⩾', 2417 => '⩿', 2418 => '⪀', 2419 => '⪁', 2420 => '⪂', 2421 => '⪃', 2422 => '⪄', 2423 => '⪅', 2424 => '⪆', 2425 => '⪇', 2426 => '⪈', 2427 => '⪉', 2428 => '⪊', 2429 => '⪋', 2430 => '⪌', 2431 => '⪍', 2432 => '⪎', 2433 => '⪏', 2434 => '⪐', 2435 => '⪑', 2436 => '⪒', 2437 => '⪓', 2438 => '⪔', 2439 => '⪕', 2440 => '⪖', 2441 => '⪗', 2442 => '⪘', 2443 => '⪙', 2444 => '⪚', 2445 => '⪛', 2446 => '⪜', 2447 => '⪝', 2448 => '⪞', 2449 => '⪟', 2450 => '⪠', 2451 => '⪡', 2452 => '⪢', 2453 => '⪣', 2454 => '⪤', 2455 => '⪥', 2456 => '⪦', 2457 => '⪧', 2458 => '⪨', 2459 => '⪩', 2460 => '⪪', 2461 => '⪫', 2462 => '⪬', 2463 => '⪭', 2464 => '⪮', 2465 => '⪯', 2466 => '⪰', 2467 => '⪱', 2468 => '⪲', 2469 => '⪳', 2470 => '⪴', 2471 => '⪵', 2472 => '⪶', 2473 => '⪷', 2474 => '⪸', 2475 => '⪹', 2476 => '⪺', 2477 => '⪻', 2478 => '⪼', 2479 => '⪽', 2480 => '⪾', 2481 => '⪿', 2482 => '⫀', 2483 => '⫁', 2484 => '⫂', 2485 => '⫃', 2486 => '⫄', 2487 => '⫅', 2488 => '⫆', 2489 => '⫇', 2490 => '⫈', 2491 => '⫉', 2492 => '⫊', 2493 => '⫋', 2494 => '⫌', 2495 => '⫍', 2496 => '⫎', 2497 => '⫏', 2498 => '⫐', 2499 => '⫑', 2500 => '⫒', 2501 => '⫓', 2502 => '⫔', 2503 => '⫕', 2504 => '⫖', 2505 => '⫗', 2506 => '⫘', 2507 => '⫙', 2508 => '⫚', 2509 => '⫛', 2510 => '⫝', 2511 => '⫞', 2512 => '⫟', 2513 => '⫠', 2514 => '⫡', 2515 => '⫢', 2516 => '⫣', 2517 => '⫤', 2518 => '⫥', 2519 => '⫦', 2520 => '⫧', 2521 => '⫨', 2522 => '⫩', 2523 => '⫪', 2524 => '⫫', 2525 => '⫬', 2526 => '⫭', 2527 => '⫮', 2528 => '⫯', 2529 => '⫰', 2530 => '⫱', 2531 => '⫲', 2532 => '⫳', 2533 => '⫴', 2534 => '⫵', 2535 => '⫶', 2536 => '⫷', 2537 => '⫸', 2538 => '⫹', 2539 => '⫺', 2540 => '⫻', 2541 => '⫼', 2542 => '⫽', 2543 => '⫾', 2544 => '⫿', 2545 => '⬀', 2546 => '⬁', 2547 => '⬂', 2548 => '⬃', 2549 => '⬄', 2550 => '⬅', 2551 => '⬆', 2552 => '⬇', 2553 => '⬈', 2554 => '⬉', 2555 => '⬊', 2556 => '⬋', 2557 => '⬌', 2558 => '⬍', 2559 => '⬎', 2560 => '⬏', 2561 => '⬐', 2562 => '⬑', 2563 => '⬒', 2564 => '⬓', 2565 => '⬔', 2566 => '⬕', 2567 => '⬖', 2568 => '⬗', 2569 => '⬘', 2570 => '⬙', 2571 => '⬚', 2572 => '⬛', 2573 => '⬜', 2574 => '⬝', 2575 => '⬞', 2576 => '⬟', 2577 => '⬠', 2578 => '⬡', 2579 => '⬢', 2580 => '⬣', 2581 => '⬤', 2582 => '⬥', 2583 => '⬦', 2584 => '⬧', 2585 => '⬨', 2586 => '⬩', 2587 => '⬪', 2588 => '⬫', 2589 => '⬬', 2590 => '⬭', 2591 => '⬮', 2592 => '⬯', 2593 => '⬰', 2594 => '⬱', 2595 => '⬲', 2596 => '⬳', 2597 => '⬴', 2598 => '⬵', 2599 => '⬶', 2600 => '⬷', 2601 => '⬸', 2602 => '⬹', 2603 => '⬺', 2604 => '⬻', 2605 => '⬼', 2606 => '⬽', 2607 => '⬾', 2608 => '⬿', 2609 => '⭀', 2610 => '⭁', 2611 => '⭂', 2612 => '⭃', 2613 => '⭄', 2614 => '⭅', 2615 => '⭆', 2616 => '⭇', 2617 => '⭈', 2618 => '⭉', 2619 => '⭊', 2620 => '⭋', 2621 => '⭌', 2622 => '⭐', 2623 => '⭑', 2624 => '⭒', 2625 => '⭓', 2626 => '⭔', 2627 => '⭕', 2628 => '⭖', 2629 => '⭗', 2630 => '⭘', 2631 => '⭙', 2632 => '⳥', 2633 => '⳦', 2634 => '⳧', 2635 => '⳨', 2636 => '⳩', 2637 => '⳪', 2638 => '⠀', 2639 => '⠁', 2640 => '⠂', 2641 => '⠃', 2642 => '⠄', 2643 => '⠅', 2644 => '⠆', 2645 => '⠇', 2646 => '⠈', 2647 => '⠉', 2648 => '⠊', 2649 => '⠋', 2650 => '⠌', 2651 => '⠍', 2652 => '⠎', 2653 => '⠏', 2654 => '⠐', 2655 => '⠑', 2656 => '⠒', 2657 => '⠓', 2658 => '⠔', 2659 => '⠕', 2660 => '⠖', 2661 => '⠗', 2662 => '⠘', 2663 => '⠙', 2664 => '⠚', 2665 => '⠛', 2666 => '⠜', 2667 => '⠝', 2668 => '⠞', 2669 => '⠟', 2670 => '⠠', 2671 => '⠡', 2672 => '⠢', 2673 => '⠣', 2674 => '⠤', 2675 => '⠥', 2676 => '⠦', 2677 => '⠧', 2678 => '⠨', 2679 => '⠩', 2680 => '⠪', 2681 => '⠫', 2682 => '⠬', 2683 => '⠭', 2684 => '⠮', 2685 => '⠯', 2686 => '⠰', 2687 => '⠱', 2688 => '⠲', 2689 => '⠳', 2690 => '⠴', 2691 => '⠵', 2692 => '⠶', 2693 => '⠷', 2694 => '⠸', 2695 => '⠹', 2696 => '⠺', 2697 => '⠻', 2698 => '⠼', 2699 => '⠽', 2700 => '⠾', 2701 => '⠿', 2702 => '⡀', 2703 => '⡁', 2704 => '⡂', 2705 => '⡃', 2706 => '⡄', 2707 => '⡅', 2708 => '⡆', 2709 => '⡇', 2710 => '⡈', 2711 => '⡉', 2712 => '⡊', 2713 => '⡋', 2714 => '⡌', 2715 => '⡍', 2716 => '⡎', 2717 => '⡏', 2718 => '⡐', 2719 => '⡑', 2720 => '⡒', 2721 => '⡓', 2722 => '⡔', 2723 => '⡕', 2724 => '⡖', 2725 => '⡗', 2726 => '⡘', 2727 => '⡙', 2728 => '⡚', 2729 => '⡛', 2730 => '⡜', 2731 => '⡝', 2732 => '⡞', 2733 => '⡟', 2734 => '⡠', 2735 => '⡡', 2736 => '⡢', 2737 => '⡣', 2738 => '⡤', 2739 => '⡥', 2740 => '⡦', 2741 => '⡧', 2742 => '⡨', 2743 => '⡩', 2744 => '⡪', 2745 => '⡫', 2746 => '⡬', 2747 => '⡭', 2748 => '⡮', 2749 => '⡯', 2750 => '⡰', 2751 => '⡱', 2752 => '⡲', 2753 => '⡳', 2754 => '⡴', 2755 => '⡵', 2756 => '⡶', 2757 => '⡷', 2758 => '⡸', 2759 => '⡹', 2760 => '⡺', 2761 => '⡻', 2762 => '⡼', 2763 => '⡽', 2764 => '⡾', 2765 => '⡿', 2766 => '⢀', 2767 => '⢁', 2768 => '⢂', 2769 => '⢃', 2770 => '⢄', 2771 => '⢅', 2772 => '⢆', 2773 => '⢇', 2774 => '⢈', 2775 => '⢉', 2776 => '⢊', 2777 => '⢋', 2778 => '⢌', 2779 => '⢍', 2780 => '⢎', 2781 => '⢏', 2782 => '⢐', 2783 => '⢑', 2784 => '⢒', 2785 => '⢓', 2786 => '⢔', 2787 => '⢕', 2788 => '⢖', 2789 => '⢗', 2790 => '⢘', 2791 => '⢙', 2792 => '⢚', 2793 => '⢛', 2794 => '⢜', 2795 => '⢝', 2796 => '⢞', 2797 => '⢟', 2798 => '⢠', 2799 => '⢡', 2800 => '⢢', 2801 => '⢣', 2802 => '⢤', 2803 => '⢥', 2804 => '⢦', 2805 => '⢧', 2806 => '⢨', 2807 => '⢩', 2808 => '⢪', 2809 => '⢫', 2810 => '⢬', 2811 => '⢭', 2812 => '⢮', 2813 => '⢯', 2814 => '⢰', 2815 => '⢱', 2816 => '⢲', 2817 => '⢳', 2818 => '⢴', 2819 => '⢵', 2820 => '⢶', 2821 => '⢷', 2822 => '⢸', 2823 => '⢹', 2824 => '⢺', 2825 => '⢻', 2826 => '⢼', 2827 => '⢽', 2828 => '⢾', 2829 => '⢿', 2830 => '⣀', 2831 => '⣁', 2832 => '⣂', 2833 => '⣃', 2834 => '⣄', 2835 => '⣅', 2836 => '⣆', 2837 => '⣇', 2838 => '⣈', 2839 => '⣉', 2840 => '⣊', 2841 => '⣋', 2842 => '⣌', 2843 => '⣍', 2844 => '⣎', 2845 => '⣏', 2846 => '⣐', 2847 => '⣑', 2848 => '⣒', 2849 => '⣓', 2850 => '⣔', 2851 => '⣕', 2852 => '⣖', 2853 => '⣗', 2854 => '⣘', 2855 => '⣙', 2856 => '⣚', 2857 => '⣛', 2858 => '⣜', 2859 => '⣝', 2860 => '⣞', 2861 => '⣟', 2862 => '⣠', 2863 => '⣡', 2864 => '⣢', 2865 => '⣣', 2866 => '⣤', 2867 => '⣥', 2868 => '⣦', 2869 => '⣧', 2870 => '⣨', 2871 => '⣩', 2872 => '⣪', 2873 => '⣫', 2874 => '⣬', 2875 => '⣭', 2876 => '⣮', 2877 => '⣯', 2878 => '⣰', 2879 => '⣱', 2880 => '⣲', 2881 => '⣳', 2882 => '⣴', 2883 => '⣵', 2884 => '⣶', 2885 => '⣷', 2886 => '⣸', 2887 => '⣹', 2888 => '⣺', 2889 => '⣻', 2890 => '⣼', 2891 => '⣽', 2892 => '⣾', 2893 => '⣿', 2894 => '⚊', 2895 => '⚋', 2896 => '⚌', 2897 => '⚍', 2898 => '⚎', 2899 => '⚏', 2900 => '☰', 2901 => '☱', 2902 => '☲', 2903 => '☳', 2904 => '☴', 2905 => '☵', 2906 => '☶', 2907 => '☷', 2908 => '䷀', 2909 => '䷁', 2910 => '䷂', 2911 => '䷃', 2912 => '䷄', 2913 => '䷅', 2914 => '䷆', 2915 => '䷇', 2916 => '䷈', 2917 => '䷉', 2918 => '䷊', 2919 => '䷋', 2920 => '䷌', 2921 => '䷍', 2922 => '䷎', 2923 => '䷏', 2924 => '䷐', 2925 => '䷑', 2926 => '䷒', 2927 => '䷓', 2928 => '䷔', 2929 => '䷕', 2930 => '䷖', 2931 => '䷗', 2932 => '䷘', 2933 => '䷙', 2934 => '䷚', 2935 => '䷛', 2936 => '䷜', 2937 => '䷝', 2938 => '䷞', 2939 => '䷟', 2940 => '䷠', 2941 => '䷡', 2942 => '䷢', 2943 => '䷣', 2944 => '䷤', 2945 => '䷥', 2946 => '䷦', 2947 => '䷧', 2948 => '䷨', 2949 => '䷩', 2950 => '䷪', 2951 => '䷫', 2952 => '䷬', 2953 => '䷭', 2954 => '䷮', 2955 => '䷯', 2956 => '䷰', 2957 => '䷱', 2958 => '䷲', 2959 => '䷳', 2960 => '䷴', 2961 => '䷵', 2962 => '䷶', 2963 => '䷷', 2964 => '䷸', 2965 => '䷹', 2966 => '䷺', 2967 => '䷻', 2968 => '䷼', 2969 => '䷽', 2970 => '䷾', 2971 => '䷿', 2972 => '𝌀', 2973 => '𝌁', 2974 => '𝌂', 2975 => '𝌃', 2976 => '𝌄', 2977 => '𝌅', 2978 => '𝌆', 2979 => '𝌇', 2980 => '𝌈', 2981 => '𝌉', 2982 => '𝌊', 2983 => '𝌋', 2984 => '𝌌', 2985 => '𝌍', 2986 => '𝌎', 2987 => '𝌏', 2988 => '𝌐', 2989 => '𝌑', 2990 => '𝌒', 2991 => '𝌓', 2992 => '𝌔', 2993 => '𝌕', 2994 => '𝌖', 2995 => '𝌗', 2996 => '𝌘', 2997 => '𝌙', 2998 => '𝌚', 2999 => '𝌛', 3000 => '𝌜', 3001 => '𝌝', 3002 => '𝌞', 3003 => '𝌟', 3004 => '𝌠', 3005 => '𝌡', 3006 => '𝌢', 3007 => '𝌣', 3008 => '𝌤', 3009 => '𝌥', 3010 => '𝌦', 3011 => '𝌧', 3012 => '𝌨', 3013 => '𝌩', 3014 => '𝌪', 3015 => '𝌫', 3016 => '𝌬', 3017 => '𝌭', 3018 => '𝌮', 3019 => '𝌯', 3020 => '𝌰', 3021 => '𝌱', 3022 => '𝌲', 3023 => '𝌳', 3024 => '𝌴', 3025 => '𝌵', 3026 => '𝌶', 3027 => '𝌷', 3028 => '𝌸', 3029 => '𝌹', 3030 => '𝌺', 3031 => '𝌻', 3032 => '𝌼', 3033 => '𝌽', 3034 => '𝌾', 3035 => '𝌿', 3036 => '𝍀', 3037 => '𝍁', 3038 => '𝍂', 3039 => '𝍃', 3040 => '𝍄', 3041 => '𝍅', 3042 => '𝍆', 3043 => '𝍇', 3044 => '𝍈', 3045 => '𝍉', 3046 => '𝍊', 3047 => '𝍋', 3048 => '𝍌', 3049 => '𝍍', 3050 => '𝍎', 3051 => '𝍏', 3052 => '𝍐', 3053 => '𝍑', 3054 => '𝍒', 3055 => '𝍓', 3056 => '𝍔', 3057 => '𝍕', 3058 => '𝍖', 3059 => '꒐', 3060 => '꒑', 3061 => '꒒', 3062 => '꒓', 3063 => '꒔', 3064 => '꒕', 3065 => '꒖', 3066 => '꒗', 3067 => '꒘', 3068 => '꒙', 3069 => '꒚', 3070 => '꒛', 3071 => '꒜', 3072 => '꒝', 3073 => '꒞', 3074 => '꒟', 3075 => '꒠', 3076 => '꒡', 3077 => '꒢', 3078 => '꒣', 3079 => '꒤', 3080 => '꒥', 3081 => '꒦', 3082 => '꒧', 3083 => '꒨', 3084 => '꒩', 3085 => '꒪', 3086 => '꒫', 3087 => '꒬', 3088 => '꒭', 3089 => '꒮', 3090 => '꒯', 3091 => '꒰', 3092 => '꒱', 3093 => '꒲', 3094 => '꒳', 3095 => '꒴', 3096 => '꒵', 3097 => '꒶', 3098 => '꒷', 3099 => '꒸', 3100 => '꒹', 3101 => '꒺', 3102 => '꒻', 3103 => '꒼', 3104 => '꒽', 3105 => '꒾', 3106 => '꒿', 3107 => '꓀', 3108 => '꓁', 3109 => '꓂', 3110 => '꓃', 3111 => '꓄', 3112 => '꓅', 3113 => '꓆', 3114 => '𐄷', 3115 => '𐄸', 3116 => '𐄹', 3117 => '𐄺', 3118 => '𐄻', 3119 => '𐄼', 3120 => '𐄽', 3121 => '𐄾', 3122 => '𐄿', 3123 => '𐅹', 3124 => '𐅺', 3125 => '𐅻', 3126 => '𐅼', 3127 => '𐅽', 3128 => '𐅾', 3129 => '𐅿', 3130 => '𐆀', 3131 => '𐆁', 3132 => '𐆂', 3133 => '𐆃', 3134 => '𐆄', 3135 => '𐆅', 3136 => '𐆆', 3137 => '𐆇', 3138 => '𐆈', 3139 => '𐆉', 3140 => '𐆐', 3141 => '𐆑', 3142 => '𐆒', 3143 => '𐆓', 3144 => '𐆔', 3145 => '𐆕', 3146 => '𐆖', 3147 => '𐆗', 3148 => '𐆘', 3149 => '𐆙', 3150 => '𐆚', 3151 => '𐆛', 3152 => '𐇐', 3153 => '𐇑', 3154 => '𐇒', 3155 => '𐇓', 3156 => '𐇔', 3157 => '𐇕', 3158 => '𐇖', 3159 => '𐇗', 3160 => '𐇘', 3161 => '𐇙', 3162 => '𐇚', 3163 => '𐇛', 3164 => '𐇜', 3165 => '𐇝', 3166 => '𐇞', 3167 => '𐇟', 3168 => '𐇠', 3169 => '𐇡', 3170 => '𐇢', 3171 => '𐇣', 3172 => '𐇤', 3173 => '𐇥', 3174 => '𐇦', 3175 => '𐇧', 3176 => '𐇨', 3177 => '𐇩', 3178 => '𐇪', 3179 => '𐇫', 3180 => '𐇬', 3181 => '𐇭', 3182 => '𐇮', 3183 => '𐇯', 3184 => '𐇰', 3185 => '𐇱', 3186 => '𐇲', 3187 => '𐇳', 3188 => '𐇴', 3189 => '𐇵', 3190 => '𐇶', 3191 => '𐇷', 3192 => '𐇸', 3193 => '𐇹', 3194 => '𐇺', 3195 => '𐇻', 3196 => '𐇼', 3197 => '𝀀', 3198 => '𝀁', 3199 => '𝀂', 3200 => '𝀃', 3201 => '𝀄', 3202 => '𝀅', 3203 => '𝀆', 3204 => '𝀇', 3205 => '𝀈', 3206 => '𝀉', 3207 => '𝀊', 3208 => '𝀋', 3209 => '𝀌', 3210 => '𝀍', 3211 => '𝀎', 3212 => '𝀏', 3213 => '𝀐', 3214 => '𝀑', 3215 => '𝀒', 3216 => '𝀓', 3217 => '𝀔', 3218 => '𝀕', 3219 => '𝀖', 3220 => '𝀗', 3221 => '𝀘', 3222 => '𝀙', 3223 => '𝀚', 3224 => '𝀛', 3225 => '𝀜', 3226 => '𝀝', 3227 => '𝀞', 3228 => '𝀟', 3229 => '𝀠', 3230 => '𝀡', 3231 => '𝀢', 3232 => '𝀣', 3233 => '𝀤', 3234 => '𝀥', 3235 => '𝀦', 3236 => '𝀧', 3237 => '𝀨', 3238 => '𝀩', 3239 => '𝀪', 3240 => '𝀫', 3241 => '𝀬', 3242 => '𝀭', 3243 => '𝀮', 3244 => '𝀯', 3245 => '𝀰', 3246 => '𝀱', 3247 => '𝀲', 3248 => '𝀳', 3249 => '𝀴', 3250 => '𝀵', 3251 => '𝀶', 3252 => '𝀷', 3253 => '𝀸', 3254 => '𝀹', 3255 => '𝀺', 3256 => '𝀻', 3257 => '𝀼', 3258 => '𝀽', 3259 => '𝀾', 3260 => '𝀿', 3261 => '𝁀', 3262 => '𝁁', 3263 => '𝁂', 3264 => '𝁃', 3265 => '𝁄', 3266 => '𝁅', 3267 => '𝁆', 3268 => '𝁇', 3269 => '𝁈', 3270 => '𝁉', 3271 => '𝁊', 3272 => '𝁋', 3273 => '𝁌', 3274 => '𝁍', 3275 => '𝁎', 3276 => '𝁏', 3277 => '𝁐', 3278 => '𝁑', 3279 => '𝁒', 3280 => '𝁓', 3281 => '𝁔', 3282 => '𝁕', 3283 => '𝁖', 3284 => '𝁗', 3285 => '𝁘', 3286 => '𝁙', 3287 => '𝁚', 3288 => '𝁛', 3289 => '𝁜', 3290 => '𝁝', 3291 => '𝁞', 3292 => '𝁟', 3293 => '𝁠', 3294 => '𝁡', 3295 => '𝁢', 3296 => '𝁣', 3297 => '𝁤', 3298 => '𝁥', 3299 => '𝁦', 3300 => '𝁧', 3301 => '𝁨', 3302 => '𝁩', 3303 => '𝁪', 3304 => '𝁫', 3305 => '𝁬', 3306 => '𝁭', 3307 => '𝁮', 3308 => '𝁯', 3309 => '𝁰', 3310 => '𝁱', 3311 => '𝁲', 3312 => '𝁳', 3313 => '𝁴', 3314 => '𝁵', 3315 => '𝁶', 3316 => '𝁷', 3317 => '𝁸', 3318 => '𝁹', 3319 => '𝁺', 3320 => '𝁻', 3321 => '𝁼', 3322 => '𝁽', 3323 => '𝁾', 3324 => '𝁿', 3325 => '𝂀', 3326 => '𝂁', 3327 => '𝂂', 3328 => '𝂃', 3329 => '𝂄', 3330 => '𝂅', 3331 => '𝂆', 3332 => '𝂇', 3333 => '𝂈', 3334 => '𝂉', 3335 => '𝂊', 3336 => '𝂋', 3337 => '𝂌', 3338 => '𝂍', 3339 => '𝂎', 3340 => '𝂏', 3341 => '𝂐', 3342 => '𝂑', 3343 => '𝂒', 3344 => '𝂓', 3345 => '𝂔', 3346 => '𝂕', 3347 => '𝂖', 3348 => '𝂗', 3349 => '𝂘', 3350 => '𝂙', 3351 => '𝂚', 3352 => '𝂛', 3353 => '𝂜', 3354 => '𝂝', 3355 => '𝂞', 3356 => '𝂟', 3357 => '𝂠', 3358 => '𝂡', 3359 => '𝂢', 3360 => '𝂣', 3361 => '𝂤', 3362 => '𝂥', 3363 => '𝂦', 3364 => '𝂧', 3365 => '𝂨', 3366 => '𝂩', 3367 => '𝂪', 3368 => '𝂫', 3369 => '𝂬', 3370 => '𝂭', 3371 => '𝂮', 3372 => '𝂯', 3373 => '𝂰', 3374 => '𝂱', 3375 => '𝂲', 3376 => '𝂳', 3377 => '𝂴', 3378 => '𝂵', 3379 => '𝂶', 3380 => '𝂷', 3381 => '𝂸', 3382 => '𝂹', 3383 => '𝂺', 3384 => '𝂻', 3385 => '𝂼', 3386 => '𝂽', 3387 => '𝂾', 3388 => '𝂿', 3389 => '𝃀', 3390 => '𝃁', 3391 => '𝃂', 3392 => '𝃃', 3393 => '𝃄', 3394 => '𝃅', 3395 => '𝃆', 3396 => '𝃇', 3397 => '𝃈', 3398 => '𝃉', 3399 => '𝃊', 3400 => '𝃋', 3401 => '𝃌', 3402 => '𝃍', 3403 => '𝃎', 3404 => '𝃏', 3405 => '𝃐', 3406 => '𝃑', 3407 => '𝃒', 3408 => '𝃓', 3409 => '𝃔', 3410 => '𝃕', 3411 => '𝃖', 3412 => '𝃗', 3413 => '𝃘', 3414 => '𝃙', 3415 => '𝃚', 3416 => '𝃛', 3417 => '𝃜', 3418 => '𝃝', 3419 => '𝃞', 3420 => '𝃟', 3421 => '𝃠', 3422 => '𝃡', 3423 => '𝃢', 3424 => '𝃣', 3425 => '𝃤', 3426 => '𝃥', 3427 => '𝃦', 3428 => '𝃧', 3429 => '𝃨', 3430 => '𝃩', 3431 => '𝃪', 3432 => '𝃫', 3433 => '𝃬', 3434 => '𝃭', 3435 => '𝃮', 3436 => '𝃯', 3437 => '𝃰', 3438 => '𝃱', 3439 => '𝃲', 3440 => '𝃳', 3441 => '𝃴', 3442 => '𝃵', 3443 => '𝄀', 3444 => '𝄁', 3445 => '𝄂', 3446 => '𝄃', 3447 => '𝄄', 3448 => '𝄅', 3449 => '𝄆', 3450 => '𝄇', 3451 => '𝄈', 3452 => '𝄉', 3453 => '𝄊', 3454 => '𝄋', 3455 => '𝄌', 3456 => '𝄍', 3457 => '𝄎', 3458 => '𝄏', 3459 => '𝄐', 3460 => '𝄑', 3461 => '𝄒', 3462 => '𝄓', 3463 => '𝄔', 3464 => '𝄕', 3465 => '𝄖', 3466 => '𝄗', 3467 => '𝄘', 3468 => '𝄙', 3469 => '𝄚', 3470 => '𝄛', 3471 => '𝄜', 3472 => '𝄝', 3473 => '𝄞', 3474 => '𝄟', 3475 => '𝄠', 3476 => '𝄡', 3477 => '𝄢', 3478 => '𝄣', 3479 => '𝄤', 3480 => '𝄥', 3481 => '𝄦', 3482 => '♭', 3483 => '♮', 3484 => '♯', 3485 => '𝄪', 3486 => '𝄫', 3487 => '𝄬', 3488 => '𝄭', 3489 => '𝄮', 3490 => '𝄯', 3491 => '𝄰', 3492 => '𝄱', 3493 => '𝄲', 3494 => '𝄳', 3495 => '𝄴', 3496 => '𝄵', 3497 => '𝄶', 3498 => '𝄷', 3499 => '𝄸', 3500 => '𝄹', 3501 => '𝄩', 3502 => '𝄺', 3503 => '𝄻', 3504 => '𝄼', 3505 => '𝄽', 3506 => '𝄾', 3507 => '𝄿', 3508 => '𝅀', 3509 => '𝅁', 3510 => '𝅂', 3511 => '𝅃', 3512 => '𝅄', 3513 => '𝅅', 3514 => '𝅆', 3515 => '𝅇', 3516 => '𝅈', 3517 => '𝅉', 3518 => '𝅊', 3519 => '𝅋', 3520 => '𝅌', 3521 => '𝅍', 3522 => '𝅎', 3523 => '𝅏', 3524 => '𝅐', 3525 => '𝅑', 3526 => '𝅒', 3527 => '𝅓', 3528 => '𝅔', 3529 => '𝅕', 3530 => '𝅖', 3531 => '𝅗', 3532 => '𝅘', 3533 => '𝅙', 3534 => '𝅚', 3535 => '𝅛', 3536 => '𝅜', 3537 => '𝅝', 3538 => '𝅪', 3539 => '𝅫', 3540 => '𝅬', 3541 => '𝆃', 3542 => '𝆄', 3543 => '𝆌', 3544 => '𝆍', 3545 => '𝆎', 3546 => '𝆏', 3547 => '𝆐', 3548 => '𝆑', 3549 => '𝆒', 3550 => '𝆓', 3551 => '𝆔', 3552 => '𝆕', 3553 => '𝆖', 3554 => '𝆗', 3555 => '𝆘', 3556 => '𝆙', 3557 => '𝆚', 3558 => '𝆛', 3559 => '𝆜', 3560 => '𝆝', 3561 => '𝆞', 3562 => '𝆟', 3563 => '𝆠', 3564 => '𝆡', 3565 => '𝆢', 3566 => '𝆣', 3567 => '𝆤', 3568 => '𝆥', 3569 => '𝆦', 3570 => '𝆧', 3571 => '𝆨', 3572 => '𝆩', 3573 => '𝆮', 3574 => '𝆯', 3575 => '𝆰', 3576 => '𝆱', 3577 => '𝆲', 3578 => '𝆳', 3579 => '𝆴', 3580 => '𝆵', 3581 => '𝆶', 3582 => '𝆷', 3583 => '𝆸', 3584 => '𝆹', 3585 => '𝆺', 3586 => '𝇁', 3587 => '𝇂', 3588 => '𝇃', 3589 => '𝇄', 3590 => '𝇅', 3591 => '𝇆', 3592 => '𝇇', 3593 => '𝇈', 3594 => '𝇉', 3595 => '𝇊', 3596 => '𝇋', 3597 => '𝇌', 3598 => '𝇍', 3599 => '𝇎', 3600 => '𝇏', 3601 => '𝇐', 3602 => '𝇑', 3603 => '𝇒', 3604 => '𝇓', 3605 => '𝇔', 3606 => '𝇕', 3607 => '𝇖', 3608 => '𝇗', 3609 => '𝇘', 3610 => '𝇙', 3611 => '𝇚', 3612 => '𝇛', 3613 => '𝇜', 3614 => '𝇝', 3615 => '𝈀', 3616 => '𝈁', 3617 => '𝈂', 3618 => '𝈃', 3619 => '𝈄', 3620 => '𝈅', 3621 => '𝈆', 3622 => '𝈇', 3623 => '𝈈', 3624 => '𝈉', 3625 => '𝈊', 3626 => '𝈋', 3627 => '𝈌', 3628 => '𝈍', 3629 => '𝈎', 3630 => '𝈏', 3631 => '𝈐', 3632 => '𝈑', 3633 => '𝈒', 3634 => '𝈓', 3635 => '𝈔', 3636 => '𝈕', 3637 => '𝈖', 3638 => '𝈗', 3639 => '𝈘', 3640 => '𝈙', 3641 => '𝈚', 3642 => '𝈛', 3643 => '𝈜', 3644 => '𝈝', 3645 => '𝈞', 3646 => '𝈟', 3647 => '𝈠', 3648 => '𝈡', 3649 => '𝈢', 3650 => '𝈣', 3651 => '𝈤', 3652 => '𝈥', 3653 => '𝈦', 3654 => '𝈧', 3655 => '𝈨', 3656 => '𝈩', 3657 => '𝈪', 3658 => '𝈫', 3659 => '𝈬', 3660 => '𝈭', 3661 => '𝈮', 3662 => '𝈯', 3663 => '𝈰', 3664 => '𝈱', 3665 => '𝈲', 3666 => '𝈳', 3667 => '𝈴', 3668 => '𝈵', 3669 => '𝈶', 3670 => '𝈷', 3671 => '𝈸', 3672 => '𝈹', 3673 => '𝈺', 3674 => '𝈻', 3675 => '𝈼', 3676 => '𝈽', 3677 => '𝈾', 3678 => '𝈿', 3679 => '𝉀', 3680 => '𝉁', 3681 => '𝉅', 3682 => '🀀', 3683 => '🀁', 3684 => '🀂', 3685 => '🀃', 3686 => '🀄', 3687 => '🀅', 3688 => '🀆', 3689 => '🀇', 3690 => '🀈', 3691 => '🀉', 3692 => '🀊', 3693 => '🀋', 3694 => '🀌', 3695 => '🀍', 3696 => '🀎', 3697 => '🀏', 3698 => '🀐', 3699 => '🀑', 3700 => '🀒', 3701 => '🀓', 3702 => '🀔', 3703 => '🀕', 3704 => '🀖', 3705 => '🀗', 3706 => '🀘', 3707 => '🀙', 3708 => '🀚', 3709 => '🀛', 3710 => '🀜', 3711 => '🀝', 3712 => '🀞', 3713 => '🀟', 3714 => '🀠', 3715 => '🀡', 3716 => '🀢', 3717 => '🀣', 3718 => '🀤', 3719 => '🀥', 3720 => '🀦', 3721 => '🀧', 3722 => '🀨', 3723 => '🀩', 3724 => '🀪', 3725 => '🀫', 3726 => '🀰', 3727 => '🀱', 3728 => '🀲', 3729 => '🀳', 3730 => '🀴', 3731 => '🀵', 3732 => '🀶', 3733 => '🀷', 3734 => '🀸', 3735 => '🀹', 3736 => '🀺', 3737 => '🀻', 3738 => '🀼', 3739 => '🀽', 3740 => '🀾', 3741 => '🀿', 3742 => '🁀', 3743 => '🁁', 3744 => '🁂', 3745 => '🁃', 3746 => '🁄', 3747 => '🁅', 3748 => '🁆', 3749 => '🁇', 3750 => '🁈', 3751 => '🁉', 3752 => '🁊', 3753 => '🁋', 3754 => '🁌', 3755 => '🁍', 3756 => '🁎', 3757 => '🁏', 3758 => '🁐', 3759 => '🁑', 3760 => '🁒', 3761 => '🁓', 3762 => '🁔', 3763 => '🁕', 3764 => '🁖', 3765 => '🁗', 3766 => '🁘', 3767 => '🁙', 3768 => '🁚', 3769 => '🁛', 3770 => '🁜', 3771 => '🁝', 3772 => '🁞', 3773 => '🁟', 3774 => '🁠', 3775 => '🁡', 3776 => '🁢', 3777 => '🁣', 3778 => '🁤', 3779 => '🁥', 3780 => '🁦', 3781 => '🁧', 3782 => '🁨', 3783 => '🁩', 3784 => '🁪', 3785 => '🁫', 3786 => '🁬', 3787 => '🁭', 3788 => '🁮', 3789 => '🁯', 3790 => '🁰', 3791 => '🁱', 3792 => '🁲', 3793 => '🁳', 3794 => '🁴', 3795 => '🁵', 3796 => '🁶', 3797 => '🁷', 3798 => '🁸', 3799 => '🁹', 3800 => '🁺', 3801 => '🁻', 3802 => '🁼', 3803 => '🁽', 3804 => '🁾', 3805 => '🁿', 3806 => '🂀', 3807 => '🂁', 3808 => '🂂', 3809 => '🂃', 3810 => '🂄', 3811 => '🂅', 3812 => '🂆', 3813 => '🂇', 3814 => '🂈', 3815 => '🂉', 3816 => '🂊', 3817 => '🂋', 3818 => '🂌', 3819 => '🂍', 3820 => '🂎', 3821 => '🂏', 3822 => '🂐', 3823 => '🂑', 3824 => '🂒', 3825 => '🂓', 3826 => '🂠', 3827 => '🂡', 3828 => '🂢', 3829 => '🂣', 3830 => '🂤', 3831 => '🂥', 3832 => '🂦', 3833 => '🂧', 3834 => '🂨', 3835 => '🂩', 3836 => '🂪', 3837 => '🂫', 3838 => '🂬', 3839 => '🂭', 3840 => '🂮', 3841 => '🂱', 3842 => '🂲', 3843 => '🂳', 3844 => '🂴', 3845 => '🂵', 3846 => '🂶', 3847 => '🂷', 3848 => '🂸', 3849 => '🂹', 3850 => '🂺', 3851 => '🂻', 3852 => '🂼', 3853 => '🂽', 3854 => '🂾', 3855 => '🃁', 3856 => '🃂', 3857 => '🃃', 3858 => '🃄', 3859 => '🃅', 3860 => '🃆', 3861 => '🃇', 3862 => '🃈', 3863 => '🃉', 3864 => '🃊', 3865 => '🃋', 3866 => '🃌', 3867 => '🃍', 3868 => '🃎', 3869 => '🃏', 3870 => '🃑', 3871 => '🃒', 3872 => '🃓', 3873 => '🃔', 3874 => '🃕', 3875 => '🃖', 3876 => '🃗', 3877 => '🃘', 3878 => '🃙', 3879 => '🃚', 3880 => '🃛', 3881 => '🃜', 3882 => '🃝', 3883 => '🃞', 3884 => '🃟', 3885 => '🌀', 3886 => '🌁', 3887 => '🌂', 3888 => '🌃', 3889 => '🌄', 3890 => '🌅', 3891 => '🌆', 3892 => '🌇', 3893 => '🌈', 3894 => '🌉', 3895 => '🌊', 3896 => '🌋', 3897 => '🌌', 3898 => '🌍', 3899 => '🌎', 3900 => '🌏', 3901 => '🌐', 3902 => '🌑', 3903 => '🌒', 3904 => '🌓', 3905 => '🌔', 3906 => '🌕', 3907 => '🌖', 3908 => '🌗', 3909 => '🌘', 3910 => '🌙', 3911 => '🌚', 3912 => '🌛', 3913 => '🌜', 3914 => '🌝', 3915 => '🌞', 3916 => '🌟', 3917 => '🌠', 3918 => '🌰', 3919 => '🌱', 3920 => '🌲', 3921 => '🌳', 3922 => '🌴', 3923 => '🌵', 3924 => '🌷', 3925 => '🌸', 3926 => '🌹', 3927 => '🌺', 3928 => '🌻', 3929 => '🌼', 3930 => '🌽', 3931 => '🌾', 3932 => '🌿', 3933 => '🍀', 3934 => '🍁', 3935 => '🍂', 3936 => '🍃', 3937 => '🍄', 3938 => '🍅', 3939 => '🍆', 3940 => '🍇', 3941 => '🍈', 3942 => '🍉', 3943 => '🍊', 3944 => '🍋', 3945 => '🍌', 3946 => '🍍', 3947 => '🍎', 3948 => '🍏', 3949 => '🍐', 3950 => '🍑', 3951 => '🍒', 3952 => '🍓', 3953 => '🍔', 3954 => '🍕', 3955 => '🍖', 3956 => '🍗', 3957 => '🍘', 3958 => '🍙', 3959 => '🍚', 3960 => '🍛', 3961 => '🍜', 3962 => '🍝', 3963 => '🍞', 3964 => '🍟', 3965 => '🍠', 3966 => '🍡', 3967 => '🍢', 3968 => '🍣', 3969 => '🍤', 3970 => '🍥', 3971 => '🍦', 3972 => '🍧', 3973 => '🍨', 3974 => '🍩', 3975 => '🍪', 3976 => '🍫', 3977 => '🍬', 3978 => '🍭', 3979 => '🍮', 3980 => '🍯', 3981 => '🍰', 3982 => '🍱', 3983 => '🍲', 3984 => '🍳', 3985 => '🍴', 3986 => '🍵', 3987 => '🍶', 3988 => '🍷', 3989 => '🍸', 3990 => '🍹', 3991 => '🍺', 3992 => '🍻', 3993 => '🍼', 3994 => '🎀', 3995 => '🎁', 3996 => '🎂', 3997 => '🎃', 3998 => '🎄', 3999 => '🎅', 4000 => '🎆', 4001 => '🎇', 4002 => '🎈', 4003 => '🎉', 4004 => '🎊', 4005 => '🎋', 4006 => '🎌', 4007 => '🎍', 4008 => '🎎', 4009 => '🎏', 4010 => '🎐', 4011 => '🎑', 4012 => '🎒', 4013 => '🎓', 4014 => '🎠', 4015 => '🎡', 4016 => '🎢', 4017 => '🎣', 4018 => '🎤', 4019 => '🎥', 4020 => '🎦', 4021 => '🎧', 4022 => '🎨', 4023 => '🎩', 4024 => '🎪', 4025 => '🎫', 4026 => '🎬', 4027 => '🎭', 4028 => '🎮', 4029 => '🎯', 4030 => '🎰', 4031 => '🎱', 4032 => '🎲', 4033 => '🎳', 4034 => '🎴', 4035 => '🎵', 4036 => '🎶', 4037 => '🎷', 4038 => '🎸', 4039 => '🎹', 4040 => '🎺', 4041 => '🎻', 4042 => '🎼', 4043 => '🎽', 4044 => '🎾', 4045 => '🎿', 4046 => '🏀', 40