MediaWiki  master
Preprocessor_DOM.php
Go to the documentation of this file.
1 <?php
28 // phpcs:ignore Squiz.Classes.ValidClassName.NotCamelCaps
30 
34  public $parser;
35 
36  public $memoryLimit;
37 
38  const CACHE_PREFIX = 'preprocess-xml';
39 
43  public function __construct( $parser ) {
44  wfDeprecated( __METHOD__, '1.34' ); // T204945
45  $this->parser = $parser;
46  $mem = ini_get( 'memory_limit' );
47  $this->memoryLimit = false;
48  if ( strval( $mem ) !== '' && $mem != -1 ) {
49  if ( preg_match( '/^\d+$/', $mem ) ) {
50  $this->memoryLimit = $mem;
51  } elseif ( preg_match( '/^(\d+)M$/i', $mem, $m ) ) {
52  $this->memoryLimit = $m[1] * 1048576;
53  }
54  }
55  }
56 
60  public function newFrame() {
61  return new PPFrame_DOM( $this );
62  }
63 
68  public function newCustomFrame( $args ) {
69  return new PPCustomFrame_DOM( $this, $args );
70  }
71 
77  public function newPartNodeArray( $values ) {
78  // NOTE: DOM manipulation is slower than building & parsing XML! (or so Tim sais)
79  $xml = "<list>";
80 
81  foreach ( $values as $k => $val ) {
82  if ( is_int( $k ) ) {
83  $xml .= "<part><name index=\"$k\"/><value>"
84  . htmlspecialchars( $val ) . "</value></part>";
85  } else {
86  $xml .= "<part><name>" . htmlspecialchars( $k )
87  . "</name>=<value>" . htmlspecialchars( $val ) . "</value></part>";
88  }
89  }
90 
91  $xml .= "</list>";
92 
93  $dom = new DOMDocument();
94  Wikimedia\suppressWarnings();
95  $result = $dom->loadXML( $xml );
96  Wikimedia\restoreWarnings();
97  if ( !$result ) {
98  // Try running the XML through UtfNormal to get rid of invalid characters
99  $xml = UtfNormal\Validator::cleanUp( $xml );
100  // 1 << 19 == XML_PARSE_HUGE, needed so newer versions of libxml2
101  // don't barf when the XML is >256 levels deep
102  $result = $dom->loadXML( $xml, 1 << 19 );
103  }
104 
105  if ( !$result ) {
106  throw new MWException( 'Parameters passed to ' . __METHOD__ . ' result in invalid XML' );
107  }
108 
109  $root = $dom->documentElement;
110  $node = new PPNode_DOM( $root->childNodes );
111  return $node;
112  }
113 
118  public function memCheck() {
119  if ( $this->memoryLimit === false ) {
120  return true;
121  }
122  $usage = memory_get_usage();
123  if ( $usage > $this->memoryLimit * 0.9 ) {
124  $limit = intval( $this->memoryLimit * 0.9 / 1048576 + 0.5 );
125  throw new MWException( "Preprocessor hit 90% memory limit ($limit MB)" );
126  }
127  return $usage <= $this->memoryLimit * 0.8;
128  }
129 
154  public function preprocessToObj( $text, $flags = 0 ) {
155  $xml = $this->cacheGetTree( $text, $flags );
156  if ( $xml === false ) {
157  $xml = $this->preprocessToXml( $text, $flags );
158  $this->cacheSetTree( $text, $flags, $xml );
159  }
160 
161  // Fail if the number of elements exceeds acceptable limits
162  // Do not attempt to generate the DOM
163  $this->parser->mGeneratedPPNodeCount += substr_count( $xml, '<' );
164  $max = $this->parser->mOptions->getMaxGeneratedPPNodeCount();
165  if ( $this->parser->mGeneratedPPNodeCount > $max ) {
166  // if ( $cacheable ) { ... }
167  throw new MWException( __METHOD__ . ': generated node count limit exceeded' );
168  }
169 
170  $dom = new DOMDocument;
171  Wikimedia\suppressWarnings();
172  $result = $dom->loadXML( $xml );
173  Wikimedia\restoreWarnings();
174  if ( !$result ) {
175  // Try running the XML through UtfNormal to get rid of invalid characters
176  $xml = UtfNormal\Validator::cleanUp( $xml );
177  // 1 << 19 == XML_PARSE_HUGE, needed so newer versions of libxml2
178  // don't barf when the XML is >256 levels deep.
179  $result = $dom->loadXML( $xml, 1 << 19 );
180  }
181  if ( $result ) {
182  $obj = new PPNode_DOM( $dom->documentElement );
183  }
184 
185  // if ( $cacheable ) { ... }
186 
187  if ( !$result ) {
188  throw new MWException( __METHOD__ . ' generated invalid XML' );
189  }
190  return $obj;
191  }
192 
198  public function preprocessToXml( $text, $flags = 0 ) {
200 
201  $forInclusion = $flags & Parser::PTD_FOR_INCLUSION;
202 
203  $xmlishElements = $this->parser->getStripList();
204  $xmlishAllowMissingEndTag = [ 'includeonly', 'noinclude', 'onlyinclude' ];
205  $enableOnlyinclude = false;
206  if ( $forInclusion ) {
207  $ignoredTags = [ 'includeonly', '/includeonly' ];
208  $ignoredElements = [ 'noinclude' ];
209  $xmlishElements[] = 'noinclude';
210  if ( strpos( $text, '<onlyinclude>' ) !== false
211  && strpos( $text, '</onlyinclude>' ) !== false
212  ) {
213  $enableOnlyinclude = true;
214  }
215  } else {
216  $ignoredTags = [ 'noinclude', '/noinclude', 'onlyinclude', '/onlyinclude' ];
217  $ignoredElements = [ 'includeonly' ];
218  $xmlishElements[] = 'includeonly';
219  }
220  $xmlishRegex = implode( '|', array_merge( $xmlishElements, $ignoredTags ) );
221 
222  // Use "A" modifier (anchored) instead of "^", because ^ doesn't work with an offset
223  $elementsRegex = "~($xmlishRegex)(?:\s|\/>|>)|(!--)~iA";
224 
225  $stack = new PPDStack;
226 
227  $searchBase = "[{<\n"; # }
228  if ( !$wgDisableLangConversion ) {
229  $searchBase .= '-';
230  }
231 
232  // For fast reverse searches
233  $revText = strrev( $text );
234  $lengthText = strlen( $text );
235 
236  // Input pointer, starts out pointing to a pseudo-newline before the start
237  $i = 0;
238  // Current accumulator
239  $accum =& $stack->getAccum();
240  $accum = '<root>';
241  // True to find equals signs in arguments
242  $findEquals = false;
243  // True to take notice of pipe characters
244  $findPipe = false;
245  $headingIndex = 1;
246  // True if $i is inside a possible heading
247  $inHeading = false;
248  // True if there are no more greater-than (>) signs right of $i
249  $noMoreGT = false;
250  // Map of tag name => true if there are no more closing tags of given type right of $i
251  $noMoreClosingTag = [];
252  // True to ignore all input up to the next <onlyinclude>
253  $findOnlyinclude = $enableOnlyinclude;
254  // Do a line-start run without outputting an LF character
255  $fakeLineStart = true;
256 
257  while ( true ) {
258  // $this->memCheck();
259 
260  if ( $findOnlyinclude ) {
261  // Ignore all input up to the next <onlyinclude>
262  $startPos = strpos( $text, '<onlyinclude>', $i );
263  if ( $startPos === false ) {
264  // Ignored section runs to the end
265  $accum .= '<ignore>' . htmlspecialchars( substr( $text, $i ) ) . '</ignore>';
266  break;
267  }
268  $tagEndPos = $startPos + strlen( '<onlyinclude>' ); // past-the-end
269  $accum .= '<ignore>' . htmlspecialchars( substr( $text, $i, $tagEndPos - $i ) ) . '</ignore>';
270  $i = $tagEndPos;
271  $findOnlyinclude = false;
272  }
273 
274  if ( $fakeLineStart ) {
275  $found = 'line-start';
276  $curChar = '';
277  } else {
278  # Find next opening brace, closing brace or pipe
279  $search = $searchBase;
280  if ( $stack->top === false ) {
281  $currentClosing = '';
282  } else {
283  $currentClosing = $stack->top->close;
284  $search .= $currentClosing;
285  }
286  if ( $findPipe ) {
287  $search .= '|';
288  }
289  if ( $findEquals ) {
290  // First equals will be for the template
291  $search .= '=';
292  }
293  $rule = null;
294  # Output literal section, advance input counter
295  $literalLength = strcspn( $text, $search, $i );
296  if ( $literalLength > 0 ) {
297  $accum .= htmlspecialchars( substr( $text, $i, $literalLength ) );
298  $i += $literalLength;
299  }
300  if ( $i >= $lengthText ) {
301  if ( $currentClosing == "\n" ) {
302  // Do a past-the-end run to finish off the heading
303  $curChar = '';
304  $found = 'line-end';
305  } else {
306  # All done
307  break;
308  }
309  } else {
310  $curChar = $curTwoChar = $text[$i];
311  if ( ( $i + 1 ) < $lengthText ) {
312  $curTwoChar .= $text[$i + 1];
313  }
314  if ( $curChar == '|' ) {
315  $found = 'pipe';
316  } elseif ( $curChar == '=' ) {
317  $found = 'equals';
318  } elseif ( $curChar == '<' ) {
319  $found = 'angle';
320  } elseif ( $curChar == "\n" ) {
321  if ( $inHeading ) {
322  $found = 'line-end';
323  } else {
324  $found = 'line-start';
325  }
326  } elseif ( $curTwoChar == $currentClosing ) {
327  $found = 'close';
328  $curChar = $curTwoChar;
329  } elseif ( $curChar == $currentClosing ) {
330  $found = 'close';
331  } elseif ( isset( $this->rules[$curTwoChar] ) ) {
332  $curChar = $curTwoChar;
333  $found = 'open';
334  $rule = $this->rules[$curChar];
335  } elseif ( isset( $this->rules[$curChar] ) ) {
336  $found = 'open';
337  $rule = $this->rules[$curChar];
338  } else {
339  # Some versions of PHP have a strcspn which stops on
340  # null characters; ignore these and continue.
341  # We also may get '-' and '}' characters here which
342  # don't match -{ or $currentClosing. Add these to
343  # output and continue.
344  if ( $curChar == '-' || $curChar == '}' ) {
345  $accum .= $curChar;
346  }
347  ++$i;
348  continue;
349  }
350  }
351  }
352 
353  if ( $found == 'angle' ) {
354  $matches = false;
355  // Handle </onlyinclude>
356  if ( $enableOnlyinclude
357  && substr( $text, $i, strlen( '</onlyinclude>' ) ) == '</onlyinclude>'
358  ) {
359  $findOnlyinclude = true;
360  continue;
361  }
362 
363  // Determine element name
364  if ( !preg_match( $elementsRegex, $text, $matches, 0, $i + 1 ) ) {
365  // Element name missing or not listed
366  $accum .= '&lt;';
367  ++$i;
368  continue;
369  }
370  // Handle comments
371  if ( isset( $matches[2] ) && $matches[2] == '!--' ) {
372  // To avoid leaving blank lines, when a sequence of
373  // space-separated comments is both preceded and followed by
374  // a newline (ignoring spaces), then
375  // trim leading and trailing spaces and the trailing newline.
376 
377  // Find the end
378  $endPos = strpos( $text, '-->', $i + 4 );
379  if ( $endPos === false ) {
380  // Unclosed comment in input, runs to end
381  $inner = substr( $text, $i );
382  $accum .= '<comment>' . htmlspecialchars( $inner ) . '</comment>';
383  $i = $lengthText;
384  } else {
385  // Search backwards for leading whitespace
386  $wsStart = $i ? ( $i - strspn( $revText, " \t", $lengthText - $i ) ) : 0;
387 
388  // Search forwards for trailing whitespace
389  // $wsEnd will be the position of the last space (or the '>' if there's none)
390  $wsEnd = $endPos + 2 + strspn( $text, " \t", $endPos + 3 );
391 
392  // Keep looking forward as long as we're finding more
393  // comments.
394  $comments = [ [ $wsStart, $wsEnd ] ];
395  while ( substr( $text, $wsEnd + 1, 4 ) == '<!--' ) {
396  $c = strpos( $text, '-->', $wsEnd + 4 );
397  if ( $c === false ) {
398  break;
399  }
400  $c = $c + 2 + strspn( $text, " \t", $c + 3 );
401  $comments[] = [ $wsEnd + 1, $c ];
402  $wsEnd = $c;
403  }
404 
405  // Eat the line if possible
406  // TODO: This could theoretically be done if $wsStart == 0, i.e. for comments at
407  // the overall start. That's not how Sanitizer::removeHTMLcomments() did it, but
408  // it's a possible beneficial b/c break.
409  if ( $wsStart > 0 && substr( $text, $wsStart - 1, 1 ) == "\n"
410  && substr( $text, $wsEnd + 1, 1 ) == "\n"
411  ) {
412  // Remove leading whitespace from the end of the accumulator
413  // Sanity check first though
414  $wsLength = $i - $wsStart;
415  if ( $wsLength > 0
416  && strspn( $accum, " \t", -$wsLength ) === $wsLength
417  ) {
418  $accum = substr( $accum, 0, -$wsLength );
419  }
420 
421  // Dump all but the last comment to the accumulator
422  foreach ( $comments as $j => $com ) {
423  $startPos = $com[0];
424  $endPos = $com[1] + 1;
425  if ( $j == ( count( $comments ) - 1 ) ) {
426  break;
427  }
428  $inner = substr( $text, $startPos, $endPos - $startPos );
429  $accum .= '<comment>' . htmlspecialchars( $inner ) . '</comment>';
430  }
431 
432  // Do a line-start run next time to look for headings after the comment
433  $fakeLineStart = true;
434  } else {
435  // No line to eat, just take the comment itself
436  $startPos = $i;
437  $endPos += 2;
438  }
439 
440  if ( $stack->top ) {
441  $part = $stack->top->getCurrentPart();
442  if ( !( isset( $part->commentEnd ) && $part->commentEnd == $wsStart - 1 ) ) {
443  $part->visualEnd = $wsStart;
444  }
445  // Else comments abutting, no change in visual end
446  $part->commentEnd = $endPos;
447  }
448  $i = $endPos + 1;
449  $inner = substr( $text, $startPos, $endPos - $startPos + 1 );
450  $accum .= '<comment>' . htmlspecialchars( $inner ) . '</comment>';
451  }
452  continue;
453  }
454  $name = $matches[1];
455  $lowerName = strtolower( $name );
456  $attrStart = $i + strlen( $name ) + 1;
457 
458  // Find end of tag
459  $tagEndPos = $noMoreGT ? false : strpos( $text, '>', $attrStart );
460  if ( $tagEndPos === false ) {
461  // Infinite backtrack
462  // Disable tag search to prevent worst-case O(N^2) performance
463  $noMoreGT = true;
464  $accum .= '&lt;';
465  ++$i;
466  continue;
467  }
468 
469  // Handle ignored tags
470  if ( in_array( $lowerName, $ignoredTags ) ) {
471  $accum .= '<ignore>'
472  . htmlspecialchars( substr( $text, $i, $tagEndPos - $i + 1 ) )
473  . '</ignore>';
474  $i = $tagEndPos + 1;
475  continue;
476  }
477 
478  $tagStartPos = $i;
479  if ( $text[$tagEndPos - 1] == '/' ) {
480  $attrEnd = $tagEndPos - 1;
481  $inner = null;
482  $i = $tagEndPos + 1;
483  $close = '';
484  } else {
485  $attrEnd = $tagEndPos;
486  // Find closing tag
487  if (
488  !isset( $noMoreClosingTag[$name] ) &&
489  preg_match( "/<\/" . preg_quote( $name, '/' ) . "\s*>/i",
490  $text, $matches, PREG_OFFSET_CAPTURE, $tagEndPos + 1 )
491  ) {
492  $inner = substr( $text, $tagEndPos + 1, $matches[0][1] - $tagEndPos - 1 );
493  $i = $matches[0][1] + strlen( $matches[0][0] );
494  $close = '<close>' . htmlspecialchars( $matches[0][0] ) . '</close>';
495  } else {
496  // No end tag
497  if ( in_array( $name, $xmlishAllowMissingEndTag ) ) {
498  // Let it run out to the end of the text.
499  $inner = substr( $text, $tagEndPos + 1 );
500  $i = $lengthText;
501  $close = '';
502  } else {
503  // Don't match the tag, treat opening tag as literal and resume parsing.
504  $i = $tagEndPos + 1;
505  $accum .= htmlspecialchars( substr( $text, $tagStartPos, $tagEndPos + 1 - $tagStartPos ) );
506  // Cache results, otherwise we have O(N^2) performance for input like <foo><foo><foo>...
507  $noMoreClosingTag[$name] = true;
508  continue;
509  }
510  }
511  }
512  // <includeonly> and <noinclude> just become <ignore> tags
513  if ( in_array( $lowerName, $ignoredElements ) ) {
514  $accum .= '<ignore>' . htmlspecialchars( substr( $text, $tagStartPos, $i - $tagStartPos ) )
515  . '</ignore>';
516  continue;
517  }
518 
519  $accum .= '<ext>';
520  if ( $attrEnd <= $attrStart ) {
521  $attr = '';
522  } else {
523  $attr = substr( $text, $attrStart, $attrEnd - $attrStart );
524  }
525  $accum .= '<name>' . htmlspecialchars( $name ) . '</name>' .
526  // Note that the attr element contains the whitespace between name and attribute,
527  // this is necessary for precise reconstruction during pre-save transform.
528  '<attr>' . htmlspecialchars( $attr ) . '</attr>';
529  if ( $inner !== null ) {
530  $accum .= '<inner>' . htmlspecialchars( $inner ) . '</inner>';
531  }
532  $accum .= $close . '</ext>';
533  } elseif ( $found == 'line-start' ) {
534  // Is this the start of a heading?
535  // Line break belongs before the heading element in any case
536  if ( $fakeLineStart ) {
537  $fakeLineStart = false;
538  } else {
539  $accum .= $curChar;
540  $i++;
541  }
542 
543  $count = strspn( $text, '=', $i, 6 );
544  if ( $count == 1 && $findEquals ) {
545  // DWIM: This looks kind of like a name/value separator.
546  // Let's let the equals handler have it and break the
547  // potential heading. This is heuristic, but AFAICT the
548  // methods for completely correct disambiguation are very
549  // complex.
550  } elseif ( $count > 0 ) {
551  $piece = [
552  'open' => "\n",
553  'close' => "\n",
554  'parts' => [ new PPDPart( str_repeat( '=', $count ) ) ],
555  'startPos' => $i,
556  'count' => $count ];
557  $stack->push( $piece );
558  $accum =& $stack->getAccum();
559  $stackFlags = $stack->getFlags();
560  if ( isset( $stackFlags['findEquals'] ) ) {
561  $findEquals = $stackFlags['findEquals'];
562  }
563  if ( isset( $stackFlags['findPipe'] ) ) {
564  $findPipe = $stackFlags['findPipe'];
565  }
566  if ( isset( $stackFlags['inHeading'] ) ) {
567  $inHeading = $stackFlags['inHeading'];
568  }
569  $i += $count;
570  }
571  } elseif ( $found == 'line-end' ) {
572  $piece = $stack->top;
573  // A heading must be open, otherwise \n wouldn't have been in the search list
574  // FIXME: Don't use assert()
575  // phpcs:ignore MediaWiki.Usage.ForbiddenFunctions.assert
576  assert( $piece->open === "\n" );
577  $part = $piece->getCurrentPart();
578  // Search back through the input to see if it has a proper close.
579  // Do this using the reversed string since the other solutions
580  // (end anchor, etc.) are inefficient.
581  $wsLength = strspn( $revText, " \t", $lengthText - $i );
582  $searchStart = $i - $wsLength;
583  if ( isset( $part->commentEnd ) && $searchStart - 1 == $part->commentEnd ) {
584  // Comment found at line end
585  // Search for equals signs before the comment
586  $searchStart = $part->visualEnd;
587  $searchStart -= strspn( $revText, " \t", $lengthText - $searchStart );
588  }
589  $count = $piece->count;
590  $equalsLength = strspn( $revText, '=', $lengthText - $searchStart );
591  if ( $equalsLength > 0 ) {
592  if ( $searchStart - $equalsLength == $piece->startPos ) {
593  // This is just a single string of equals signs on its own line
594  // Replicate the doHeadings behavior /={count}(.+)={count}/
595  // First find out how many equals signs there really are (don't stop at 6)
596  $count = $equalsLength;
597  if ( $count < 3 ) {
598  $count = 0;
599  } else {
600  $count = min( 6, intval( ( $count - 1 ) / 2 ) );
601  }
602  } else {
603  $count = min( $equalsLength, $count );
604  }
605  if ( $count > 0 ) {
606  // Normal match, output <h>
607  $element = "<h level=\"$count\" i=\"$headingIndex\">$accum</h>";
608  $headingIndex++;
609  } else {
610  // Single equals sign on its own line, count=0
611  $element = $accum;
612  }
613  } else {
614  // No match, no <h>, just pass down the inner text
615  $element = $accum;
616  }
617  // Unwind the stack
618  $stack->pop();
619  $accum =& $stack->getAccum();
620  $stackFlags = $stack->getFlags();
621  if ( isset( $stackFlags['findEquals'] ) ) {
622  $findEquals = $stackFlags['findEquals'];
623  }
624  if ( isset( $stackFlags['findPipe'] ) ) {
625  $findPipe = $stackFlags['findPipe'];
626  }
627  if ( isset( $stackFlags['inHeading'] ) ) {
628  $inHeading = $stackFlags['inHeading'];
629  }
630 
631  // Append the result to the enclosing accumulator
632  $accum .= $element;
633  // Note that we do NOT increment the input pointer.
634  // This is because the closing linebreak could be the opening linebreak of
635  // another heading. Infinite loops are avoided because the next iteration MUST
636  // hit the heading open case above, which unconditionally increments the
637  // input pointer.
638  } elseif ( $found == 'open' ) {
639  # count opening brace characters
640  $curLen = strlen( $curChar );
641  $count = ( $curLen > 1 ) ?
642  # allow the final character to repeat
643  strspn( $text, $curChar[$curLen - 1], $i + 1 ) + 1 :
644  strspn( $text, $curChar, $i );
645 
646  $savedPrefix = '';
647  $lineStart = ( $i > 0 && $text[$i - 1] == "\n" );
648 
649  if ( $curChar === "-{" && $count > $curLen ) {
650  // -{ => {{ transition because rightmost wins
651  $savedPrefix = '-';
652  $i++;
653  $curChar = '{';
654  $count--;
655  $rule = $this->rules[$curChar];
656  }
657 
658  # we need to add to stack only if opening brace count is enough for one of the rules
659  if ( $count >= $rule['min'] ) {
660  # Add it to the stack
661  $piece = [
662  'open' => $curChar,
663  'close' => $rule['end'],
664  'savedPrefix' => $savedPrefix,
665  'count' => $count,
666  'lineStart' => $lineStart,
667  ];
668 
669  $stack->push( $piece );
670  $accum =& $stack->getAccum();
671  $stackFlags = $stack->getFlags();
672  if ( isset( $stackFlags['findEquals'] ) ) {
673  $findEquals = $stackFlags['findEquals'];
674  }
675  if ( isset( $stackFlags['findPipe'] ) ) {
676  $findPipe = $stackFlags['findPipe'];
677  }
678  if ( isset( $stackFlags['inHeading'] ) ) {
679  $inHeading = $stackFlags['inHeading'];
680  }
681  } else {
682  # Add literal brace(s)
683  $accum .= htmlspecialchars( $savedPrefix . str_repeat( $curChar, $count ) );
684  }
685  $i += $count;
686  } elseif ( $found == 'close' ) {
687  $piece = $stack->top;
688  # lets check if there are enough characters for closing brace
689  $maxCount = $piece->count;
690  if ( $piece->close === '}-' && $curChar === '}' ) {
691  $maxCount--; # don't try to match closing '-' as a '}'
692  }
693  $curLen = strlen( $curChar );
694  $count = ( $curLen > 1 ) ? $curLen :
695  strspn( $text, $curChar, $i, $maxCount );
696 
697  # check for maximum matching characters (if there are 5 closing
698  # characters, we will probably need only 3 - depending on the rules)
699  $rule = $this->rules[$piece->open];
700  if ( $count > $rule['max'] ) {
701  # The specified maximum exists in the callback array, unless the caller
702  # has made an error
703  $matchingCount = $rule['max'];
704  } else {
705  # Count is less than the maximum
706  # Skip any gaps in the callback array to find the true largest match
707  # Need to use array_key_exists not isset because the callback can be null
708  $matchingCount = $count;
709  while ( $matchingCount > 0 && !array_key_exists( $matchingCount, $rule['names'] ) ) {
710  --$matchingCount;
711  }
712  }
713 
714  if ( $matchingCount <= 0 ) {
715  # No matching element found in callback array
716  # Output a literal closing brace and continue
717  $endText = substr( $text, $i, $count );
718  $accum .= htmlspecialchars( $endText );
719  $i += $count;
720  continue;
721  }
722  $name = $rule['names'][$matchingCount];
723  if ( $name === null ) {
724  // No element, just literal text
725  $endText = substr( $text, $i, $matchingCount );
726  $element = $piece->breakSyntax( $matchingCount ) . $endText;
727  } else {
728  # Create XML element
729  # Note: $parts is already XML, does not need to be encoded further
730  $parts = $piece->parts;
731  $title = $parts[0]->out;
732  unset( $parts[0] );
733 
734  # The invocation is at the start of the line if lineStart is set in
735  # the stack, and all opening brackets are used up.
736  if ( $maxCount == $matchingCount &&
737  !empty( $piece->lineStart ) &&
738  strlen( $piece->savedPrefix ) == 0 ) {
739  $attr = ' lineStart="1"';
740  } else {
741  $attr = '';
742  }
743 
744  $element = "<$name$attr>";
745  $element .= "<title>$title</title>";
746  $argIndex = 1;
747  foreach ( $parts as $part ) {
748  if ( isset( $part->eqpos ) ) {
749  $argName = substr( $part->out, 0, $part->eqpos );
750  $argValue = substr( $part->out, $part->eqpos + 1 );
751  $element .= "<part><name>$argName</name>=<value>$argValue</value></part>";
752  } else {
753  $element .= "<part><name index=\"$argIndex\" /><value>{$part->out}</value></part>";
754  $argIndex++;
755  }
756  }
757  $element .= "</$name>";
758  }
759 
760  # Advance input pointer
761  $i += $matchingCount;
762 
763  # Unwind the stack
764  $stack->pop();
765  $accum =& $stack->getAccum();
766 
767  # Re-add the old stack element if it still has unmatched opening characters remaining
768  if ( $matchingCount < $piece->count ) {
769  $piece->parts = [ new PPDPart ];
770  $piece->count -= $matchingCount;
771  # do we still qualify for any callback with remaining count?
772  $min = $this->rules[$piece->open]['min'];
773  if ( $piece->count >= $min ) {
774  $stack->push( $piece );
775  $accum =& $stack->getAccum();
776  } elseif ( $piece->count == 1 && $piece->open === '{' && $piece->savedPrefix === '-' ) {
777  $piece->savedPrefix = '';
778  $piece->open = '-{';
779  $piece->count = 2;
780  $piece->close = $this->rules[$piece->open]['end'];
781  $stack->push( $piece );
782  $accum =& $stack->getAccum();
783  } else {
784  $s = substr( $piece->open, 0, -1 );
785  $s .= str_repeat(
786  substr( $piece->open, -1 ),
787  $piece->count - strlen( $s )
788  );
789  $accum .= $piece->savedPrefix . $s;
790  }
791  } elseif ( $piece->savedPrefix !== '' ) {
792  $accum .= $piece->savedPrefix;
793  }
794 
795  $stackFlags = $stack->getFlags();
796  if ( isset( $stackFlags['findEquals'] ) ) {
797  $findEquals = $stackFlags['findEquals'];
798  }
799  if ( isset( $stackFlags['findPipe'] ) ) {
800  $findPipe = $stackFlags['findPipe'];
801  }
802  if ( isset( $stackFlags['inHeading'] ) ) {
803  $inHeading = $stackFlags['inHeading'];
804  }
805 
806  # Add XML element to the enclosing accumulator
807  $accum .= $element;
808  } elseif ( $found == 'pipe' ) {
809  $findEquals = true; // shortcut for getFlags()
810  $stack->addPart();
811  $accum =& $stack->getAccum();
812  ++$i;
813  } elseif ( $found == 'equals' ) {
814  $findEquals = false; // shortcut for getFlags()
815  $stack->getCurrentPart()->eqpos = strlen( $accum );
816  $accum .= '=';
817  ++$i;
818  }
819  }
820 
821  # Output any remaining unclosed brackets
822  foreach ( $stack->stack as $piece ) {
823  $stack->rootAccum .= $piece->breakSyntax();
824  }
825  $stack->rootAccum .= '</root>';
826  $xml = $stack->rootAccum;
827 
828  return $xml;
829  }
830 }
cacheGetTree( $text, $flags)
Attempt to load a precomputed document tree for some given wikitext from the cache.
This document is intended to provide useful advice for parties seeking to redistribute MediaWiki to end users It s targeted particularly at maintainers for Linux since it s been observed that distribution packages of MediaWiki often break We ve consistently had to recommend that users seeking support use official tarballs instead of their distribution s and this often solves whatever problem the user is having It would be nice if this could such and we might be restricted by PHP settings such as safe mode or open_basedir We cannot assume that the software even has read access anywhere useful Many shared hosts run all users web applications under the same so they can t rely on Unix and must forbid reads to even standard directories like tmp lest users read each others files We cannot assume that the user has the ability to install or run any programs not written as web accessible PHP scripts Since anything that works on cheap shared hosting will work if you have shell or root access MediaWiki s design is based around catering to the lowest common denominator Although we support higher end setups as the way many things work by default is tailored toward shared hosting These defaults are unconventional from the point of view of and they certainly aren t ideal for someone who s installing MediaWiki as root
Stack class to help Preprocessor::preprocessToObj()
Definition: PPDStack.php:26
processing should stop and the error should be shown to the user * false
Definition: hooks.txt:187
The index of the header message $result[1]=The index of the body text message $result[2 through n]=Parameters passed to body text message. Please note the header message cannot receive/use parameters. 'ImgAuthModifyHeaders':Executed just before a file is streamed to a user via img_auth.php, allowing headers to be modified beforehand. $title:LinkTarget object & $headers:HTTP headers(name=> value, names are case insensitive). Two headers get special handling:If-Modified-Since(value must be a valid HTTP date) and Range(must be of the form "bytes=(\*-\*)") will be honored when streaming the file. 'ImportHandleLogItemXMLTag':When parsing a XML tag in a log item. Return false to stop further processing of the tag $reader:XMLReader object $logInfo:Array of information 'ImportHandlePageXMLTag':When parsing a XML tag in a page. Return false to stop further processing of the tag $reader:XMLReader object & $pageInfo:Array of information 'ImportHandleRevisionXMLTag':When parsing a XML tag in a page revision. Return false to stop further processing of the tag $reader:XMLReader object $pageInfo:Array of page information $revisionInfo:Array of revision information 'ImportHandleToplevelXMLTag':When parsing a top level XML tag. Return false to stop further processing of the tag $reader:XMLReader object 'ImportHandleUnknownUser':When a user doesn 't exist locally, this hook is called to give extensions an opportunity to auto-create it. If the auto-creation is successful, return false. $name:User name 'ImportHandleUploadXMLTag':When parsing a XML tag in a file upload. Return false to stop further processing of the tag $reader:XMLReader object $revisionInfo:Array of information 'ImportLogInterwikiLink':Hook to change the interwiki link used in log entries and edit summaries for transwiki imports. & $fullInterwikiPrefix:Interwiki prefix, may contain colons. & $pageTitle:String that contains page title. 'ImportSources':Called when reading from the $wgImportSources configuration variable. Can be used to lazy-load the import sources list. & $importSources:The value of $wgImportSources. Modify as necessary. See the comment in DefaultSettings.php for the detail of how to structure this array. 'InfoAction':When building information to display on the action=info page. $context:IContextSource object & $pageInfo:Array of information 'InitializeArticleMaybeRedirect':MediaWiki check to see if title is a redirect. & $title:Title object for the current page & $request:WebRequest & $ignoreRedirect:boolean to skip redirect check & $target:Title/string of redirect target & $article:Article object 'InternalParseBeforeLinks':during Parser 's internalParse method before links but after nowiki/noinclude/includeonly/onlyinclude and other processings. & $parser:Parser object & $text:string containing partially parsed text & $stripState:Parser 's internal StripState object 'InternalParseBeforeSanitize':during Parser 's internalParse method just before the parser removes unwanted/dangerous HTML tags and after nowiki/noinclude/includeonly/onlyinclude and other processings. Ideal for syntax-extensions after template/parser function execution which respect nowiki and HTML-comments. & $parser:Parser object & $text:string containing partially parsed text & $stripState:Parser 's internal StripState object 'InterwikiLoadPrefix':When resolving if a given prefix is an interwiki or not. Return true without providing an interwiki to continue interwiki search. $prefix:interwiki prefix we are looking for. & $iwData:output array describing the interwiki with keys iw_url, iw_local, iw_trans and optionally iw_api and iw_wikiid. 'InvalidateEmailComplete':Called after a user 's email has been invalidated successfully. $user:user(object) whose email is being invalidated 'IRCLineURL':When constructing the URL to use in an IRC notification. Callee may modify $url and $query, URL will be constructed as $url . $query & $url:URL to index.php & $query:Query string $rc:RecentChange object that triggered url generation 'IsFileCacheable':Override the result of Article::isFileCacheable()(if true) & $article:article(object) being checked 'IsTrustedProxy':Override the result of IP::isTrustedProxy() & $ip:IP being check & $result:Change this value to override the result of IP::isTrustedProxy() 'IsUploadAllowedFromUrl':Override the result of UploadFromUrl::isAllowedUrl() $url:URL used to upload from & $allowed:Boolean indicating if uploading is allowed for given URL 'isValidEmailAddr':Override the result of Sanitizer::validateEmail(), for instance to return false if the domain name doesn 't match your organization. $addr:The e-mail address entered by the user & $result:Set this and return false to override the internal checks 'isValidPassword':Override the result of User::isValidPassword() $password:The password entered by the user & $result:Set this and return false to override the internal checks $user:User the password is being validated for 'Language::getMessagesFileName':$code:The language code or the language we 're looking for a messages file for & $file:The messages file path, you can override this to change the location. 'LanguageGetNamespaces':Provide custom ordering for namespaces or remove namespaces. Do not use this hook to add namespaces. Use CanonicalNamespaces for that. & $namespaces:Array of namespaces indexed by their numbers 'LanguageGetTranslatedLanguageNames':Provide translated language names. & $names:array of language code=> language name $code:language of the preferred translations 'LanguageLinks':Manipulate a page 's language links. This is called in various places to allow extensions to define the effective language links for a page. $title:The page 's Title. & $links:Array with elements of the form "language:title" in the order that they will be output. & $linkFlags:Associative array mapping prefixed links to arrays of flags. Currently unused, but planned to provide support for marking individual language links in the UI, e.g. for featured articles. 'LanguageSelector':Hook to change the language selector available on a page. $out:The output page. $cssClassName:CSS class name of the language selector. 'LinkBegin':DEPRECATED since 1.28! Use HtmlPageLinkRendererBegin instead. Used when generating internal and interwiki links in Linker::link(), before processing starts. Return false to skip default processing and return $ret. See documentation for Linker::link() for details on the expected meanings of parameters. $skin:the Skin object $target:the Title that the link is pointing to & $html:the contents that the< a > tag should have(raw HTML) $result
Definition: hooks.txt:1970
if( $line===false) $args
Definition: cdb.php:64
Expansion frame with custom arguments.
this hook is for auditing only or null if authentication failed before getting that far or null if we can t even determine that When $user is not null
Definition: hooks.txt:767
An expansion frame, used as a context to expand the result of preprocessToObj()
Definition: PPFrame_DOM.php:28
const PTD_FOR_INCLUSION
Definition: Parser.php:110
This document is intended to provide useful advice for parties seeking to redistribute MediaWiki to end users It s targeted particularly at maintainers for Linux since it s been observed that distribution packages of MediaWiki often break We ve consistently had to recommend that users seeking support use official tarballs instead of their distribution s and this often solves whatever problem the user is having It would be nice if this could such as
Definition: distributors.txt:9
$wgDisableLangConversion
Whether to enable language variant conversion.
array cacheSetTree( $text, $flags, $tree)
Store a document tree in the cache.
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency which acts as the top level factory for services in MediaWiki which can be used to gain access to default instances of various services MediaWikiServices however also allows new services to be defined and default services to be redefined Services are defined or redefined by providing a callback the instantiator that will return a new instance of the service When it will create an instance of MediaWikiServices and populate it with the services defined in the files listed by thereby bootstrapping the DI framework Per $wgServiceWiringFiles lists includes ServiceWiring php
Definition: injection.txt:35
wfDeprecated( $function, $version=false, $component=false, $callerOffset=2)
Throws a warning that $function is deprecated.
preprocessToObj( $text, $flags=0)
Preprocess some wikitext and return the document tree.
Allows to change the fields on the form that will be generated $name
Definition: hooks.txt:271
newPartNodeArray( $values)
within a display generated by the Derivative if and wherever such third party notices normally appear The contents of the NOTICE file are for informational purposes only and do not modify the License You may add Your own attribution notices within Derivative Works that You alongside or as an addendum to the NOTICE text from the provided that such additional attribution notices cannot be construed as modifying the License You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for or distribution of Your or for any such Derivative Works as a provided Your and distribution of the Work otherwise complies with the conditions stated in this License Submission of Contributions Unless You explicitly state any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this without any additional terms or conditions Notwithstanding the nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions Trademarks This License does not grant permission to use the trade names
preprocessToXml( $text, $flags=0)
$matches