MediaWiki  master
Preprocessor_DOM.php
Go to the documentation of this file.
1 <?php
28 // phpcs:ignore Squiz.Classes.ValidClassName.NotCamelCaps
30 
34  public $parser;
35 
36  public $memoryLimit;
37 
38  const CACHE_PREFIX = 'preprocess-xml';
39 
40  public function __construct( $parser ) {
41  wfDeprecated( __METHOD__, '1.34' ); // T204945
42  $this->parser = $parser;
43  $mem = ini_get( 'memory_limit' );
44  $this->memoryLimit = false;
45  if ( strval( $mem ) !== '' && $mem != -1 ) {
46  if ( preg_match( '/^\d+$/', $mem ) ) {
47  $this->memoryLimit = $mem;
48  } elseif ( preg_match( '/^(\d+)M$/i', $mem, $m ) ) {
49  $this->memoryLimit = $m[1] * 1048576;
50  }
51  }
52  }
53 
57  public function newFrame() {
58  return new PPFrame_DOM( $this );
59  }
60 
65  public function newCustomFrame( $args ) {
66  return new PPCustomFrame_DOM( $this, $args );
67  }
68 
74  public function newPartNodeArray( $values ) {
75  // NOTE: DOM manipulation is slower than building & parsing XML! (or so Tim sais)
76  $xml = "<list>";
77 
78  foreach ( $values as $k => $val ) {
79  if ( is_int( $k ) ) {
80  $xml .= "<part><name index=\"$k\"/><value>"
81  . htmlspecialchars( $val ) . "</value></part>";
82  } else {
83  $xml .= "<part><name>" . htmlspecialchars( $k )
84  . "</name>=<value>" . htmlspecialchars( $val ) . "</value></part>";
85  }
86  }
87 
88  $xml .= "</list>";
89 
90  $dom = new DOMDocument();
91  Wikimedia\suppressWarnings();
92  $result = $dom->loadXML( $xml );
93  Wikimedia\restoreWarnings();
94  if ( !$result ) {
95  // Try running the XML through UtfNormal to get rid of invalid characters
96  $xml = UtfNormal\Validator::cleanUp( $xml );
97  // 1 << 19 == XML_PARSE_HUGE, needed so newer versions of libxml2
98  // don't barf when the XML is >256 levels deep
99  $result = $dom->loadXML( $xml, 1 << 19 );
100  }
101 
102  if ( !$result ) {
103  throw new MWException( 'Parameters passed to ' . __METHOD__ . ' result in invalid XML' );
104  }
105 
106  $root = $dom->documentElement;
107  $node = new PPNode_DOM( $root->childNodes );
108  return $node;
109  }
110 
115  public function memCheck() {
116  if ( $this->memoryLimit === false ) {
117  return true;
118  }
119  $usage = memory_get_usage();
120  if ( $usage > $this->memoryLimit * 0.9 ) {
121  $limit = intval( $this->memoryLimit * 0.9 / 1048576 + 0.5 );
122  throw new MWException( "Preprocessor hit 90% memory limit ($limit MB)" );
123  }
124  return $usage <= $this->memoryLimit * 0.8;
125  }
126 
151  public function preprocessToObj( $text, $flags = 0 ) {
152  $xml = $this->cacheGetTree( $text, $flags );
153  if ( $xml === false ) {
154  $xml = $this->preprocessToXml( $text, $flags );
155  $this->cacheSetTree( $text, $flags, $xml );
156  }
157 
158  // Fail if the number of elements exceeds acceptable limits
159  // Do not attempt to generate the DOM
160  $this->parser->mGeneratedPPNodeCount += substr_count( $xml, '<' );
161  $max = $this->parser->mOptions->getMaxGeneratedPPNodeCount();
162  if ( $this->parser->mGeneratedPPNodeCount > $max ) {
163  // if ( $cacheable ) { ... }
164  throw new MWException( __METHOD__ . ': generated node count limit exceeded' );
165  }
166 
167  $dom = new DOMDocument;
168  Wikimedia\suppressWarnings();
169  $result = $dom->loadXML( $xml );
170  Wikimedia\restoreWarnings();
171  if ( !$result ) {
172  // Try running the XML through UtfNormal to get rid of invalid characters
173  $xml = UtfNormal\Validator::cleanUp( $xml );
174  // 1 << 19 == XML_PARSE_HUGE, needed so newer versions of libxml2
175  // don't barf when the XML is >256 levels deep.
176  $result = $dom->loadXML( $xml, 1 << 19 );
177  }
178  if ( $result ) {
179  $obj = new PPNode_DOM( $dom->documentElement );
180  }
181 
182  // if ( $cacheable ) { ... }
183 
184  if ( !$result ) {
185  throw new MWException( __METHOD__ . ' generated invalid XML' );
186  }
187  return $obj;
188  }
189 
195  public function preprocessToXml( $text, $flags = 0 ) {
197 
198  $forInclusion = $flags & Parser::PTD_FOR_INCLUSION;
199 
200  $xmlishElements = $this->parser->getStripList();
201  $xmlishAllowMissingEndTag = [ 'includeonly', 'noinclude', 'onlyinclude' ];
202  $enableOnlyinclude = false;
203  if ( $forInclusion ) {
204  $ignoredTags = [ 'includeonly', '/includeonly' ];
205  $ignoredElements = [ 'noinclude' ];
206  $xmlishElements[] = 'noinclude';
207  if ( strpos( $text, '<onlyinclude>' ) !== false
208  && strpos( $text, '</onlyinclude>' ) !== false
209  ) {
210  $enableOnlyinclude = true;
211  }
212  } else {
213  $ignoredTags = [ 'noinclude', '/noinclude', 'onlyinclude', '/onlyinclude' ];
214  $ignoredElements = [ 'includeonly' ];
215  $xmlishElements[] = 'includeonly';
216  }
217  $xmlishRegex = implode( '|', array_merge( $xmlishElements, $ignoredTags ) );
218 
219  // Use "A" modifier (anchored) instead of "^", because ^ doesn't work with an offset
220  $elementsRegex = "~($xmlishRegex)(?:\s|\/>|>)|(!--)~iA";
221 
222  $stack = new PPDStack;
223 
224  $searchBase = "[{<\n"; # }
225  if ( !$wgDisableLangConversion ) {
226  $searchBase .= '-';
227  }
228 
229  // For fast reverse searches
230  $revText = strrev( $text );
231  $lengthText = strlen( $text );
232 
233  // Input pointer, starts out pointing to a pseudo-newline before the start
234  $i = 0;
235  // Current accumulator
236  $accum =& $stack->getAccum();
237  $accum = '<root>';
238  // True to find equals signs in arguments
239  $findEquals = false;
240  // True to take notice of pipe characters
241  $findPipe = false;
242  $headingIndex = 1;
243  // True if $i is inside a possible heading
244  $inHeading = false;
245  // True if there are no more greater-than (>) signs right of $i
246  $noMoreGT = false;
247  // Map of tag name => true if there are no more closing tags of given type right of $i
248  $noMoreClosingTag = [];
249  // True to ignore all input up to the next <onlyinclude>
250  $findOnlyinclude = $enableOnlyinclude;
251  // Do a line-start run without outputting an LF character
252  $fakeLineStart = true;
253 
254  while ( true ) {
255  // $this->memCheck();
256 
257  if ( $findOnlyinclude ) {
258  // Ignore all input up to the next <onlyinclude>
259  $startPos = strpos( $text, '<onlyinclude>', $i );
260  if ( $startPos === false ) {
261  // Ignored section runs to the end
262  $accum .= '<ignore>' . htmlspecialchars( substr( $text, $i ) ) . '</ignore>';
263  break;
264  }
265  $tagEndPos = $startPos + strlen( '<onlyinclude>' ); // past-the-end
266  $accum .= '<ignore>' . htmlspecialchars( substr( $text, $i, $tagEndPos - $i ) ) . '</ignore>';
267  $i = $tagEndPos;
268  $findOnlyinclude = false;
269  }
270 
271  if ( $fakeLineStart ) {
272  $found = 'line-start';
273  $curChar = '';
274  } else {
275  # Find next opening brace, closing brace or pipe
276  $search = $searchBase;
277  if ( $stack->top === false ) {
278  $currentClosing = '';
279  } else {
280  $currentClosing = $stack->top->close;
281  $search .= $currentClosing;
282  }
283  if ( $findPipe ) {
284  $search .= '|';
285  }
286  if ( $findEquals ) {
287  // First equals will be for the template
288  $search .= '=';
289  }
290  $rule = null;
291  # Output literal section, advance input counter
292  $literalLength = strcspn( $text, $search, $i );
293  if ( $literalLength > 0 ) {
294  $accum .= htmlspecialchars( substr( $text, $i, $literalLength ) );
295  $i += $literalLength;
296  }
297  if ( $i >= $lengthText ) {
298  if ( $currentClosing == "\n" ) {
299  // Do a past-the-end run to finish off the heading
300  $curChar = '';
301  $found = 'line-end';
302  } else {
303  # All done
304  break;
305  }
306  } else {
307  $curChar = $curTwoChar = $text[$i];
308  if ( ( $i + 1 ) < $lengthText ) {
309  $curTwoChar .= $text[$i + 1];
310  }
311  if ( $curChar == '|' ) {
312  $found = 'pipe';
313  } elseif ( $curChar == '=' ) {
314  $found = 'equals';
315  } elseif ( $curChar == '<' ) {
316  $found = 'angle';
317  } elseif ( $curChar == "\n" ) {
318  if ( $inHeading ) {
319  $found = 'line-end';
320  } else {
321  $found = 'line-start';
322  }
323  } elseif ( $curTwoChar == $currentClosing ) {
324  $found = 'close';
325  $curChar = $curTwoChar;
326  } elseif ( $curChar == $currentClosing ) {
327  $found = 'close';
328  } elseif ( isset( $this->rules[$curTwoChar] ) ) {
329  $curChar = $curTwoChar;
330  $found = 'open';
331  $rule = $this->rules[$curChar];
332  } elseif ( isset( $this->rules[$curChar] ) ) {
333  $found = 'open';
334  $rule = $this->rules[$curChar];
335  } else {
336  # Some versions of PHP have a strcspn which stops on
337  # null characters; ignore these and continue.
338  # We also may get '-' and '}' characters here which
339  # don't match -{ or $currentClosing. Add these to
340  # output and continue.
341  if ( $curChar == '-' || $curChar == '}' ) {
342  $accum .= $curChar;
343  }
344  ++$i;
345  continue;
346  }
347  }
348  }
349 
350  if ( $found == 'angle' ) {
351  $matches = false;
352  // Handle </onlyinclude>
353  if ( $enableOnlyinclude
354  && substr( $text, $i, strlen( '</onlyinclude>' ) ) == '</onlyinclude>'
355  ) {
356  $findOnlyinclude = true;
357  continue;
358  }
359 
360  // Determine element name
361  if ( !preg_match( $elementsRegex, $text, $matches, 0, $i + 1 ) ) {
362  // Element name missing or not listed
363  $accum .= '&lt;';
364  ++$i;
365  continue;
366  }
367  // Handle comments
368  if ( isset( $matches[2] ) && $matches[2] == '!--' ) {
369  // To avoid leaving blank lines, when a sequence of
370  // space-separated comments is both preceded and followed by
371  // a newline (ignoring spaces), then
372  // trim leading and trailing spaces and the trailing newline.
373 
374  // Find the end
375  $endPos = strpos( $text, '-->', $i + 4 );
376  if ( $endPos === false ) {
377  // Unclosed comment in input, runs to end
378  $inner = substr( $text, $i );
379  $accum .= '<comment>' . htmlspecialchars( $inner ) . '</comment>';
380  $i = $lengthText;
381  } else {
382  // Search backwards for leading whitespace
383  $wsStart = $i ? ( $i - strspn( $revText, " \t", $lengthText - $i ) ) : 0;
384 
385  // Search forwards for trailing whitespace
386  // $wsEnd will be the position of the last space (or the '>' if there's none)
387  $wsEnd = $endPos + 2 + strspn( $text, " \t", $endPos + 3 );
388 
389  // Keep looking forward as long as we're finding more
390  // comments.
391  $comments = [ [ $wsStart, $wsEnd ] ];
392  while ( substr( $text, $wsEnd + 1, 4 ) == '<!--' ) {
393  $c = strpos( $text, '-->', $wsEnd + 4 );
394  if ( $c === false ) {
395  break;
396  }
397  $c = $c + 2 + strspn( $text, " \t", $c + 3 );
398  $comments[] = [ $wsEnd + 1, $c ];
399  $wsEnd = $c;
400  }
401 
402  // Eat the line if possible
403  // TODO: This could theoretically be done if $wsStart == 0, i.e. for comments at
404  // the overall start. That's not how Sanitizer::removeHTMLcomments() did it, but
405  // it's a possible beneficial b/c break.
406  if ( $wsStart > 0 && substr( $text, $wsStart - 1, 1 ) == "\n"
407  && substr( $text, $wsEnd + 1, 1 ) == "\n"
408  ) {
409  // Remove leading whitespace from the end of the accumulator
410  // Sanity check first though
411  $wsLength = $i - $wsStart;
412  if ( $wsLength > 0
413  && strspn( $accum, " \t", -$wsLength ) === $wsLength
414  ) {
415  $accum = substr( $accum, 0, -$wsLength );
416  }
417 
418  // Dump all but the last comment to the accumulator
419  foreach ( $comments as $j => $com ) {
420  $startPos = $com[0];
421  $endPos = $com[1] + 1;
422  if ( $j == ( count( $comments ) - 1 ) ) {
423  break;
424  }
425  $inner = substr( $text, $startPos, $endPos - $startPos );
426  $accum .= '<comment>' . htmlspecialchars( $inner ) . '</comment>';
427  }
428 
429  // Do a line-start run next time to look for headings after the comment
430  $fakeLineStart = true;
431  } else {
432  // No line to eat, just take the comment itself
433  $startPos = $i;
434  $endPos += 2;
435  }
436 
437  if ( $stack->top ) {
438  $part = $stack->top->getCurrentPart();
439  if ( !( isset( $part->commentEnd ) && $part->commentEnd == $wsStart - 1 ) ) {
440  $part->visualEnd = $wsStart;
441  }
442  // Else comments abutting, no change in visual end
443  $part->commentEnd = $endPos;
444  }
445  $i = $endPos + 1;
446  $inner = substr( $text, $startPos, $endPos - $startPos + 1 );
447  $accum .= '<comment>' . htmlspecialchars( $inner ) . '</comment>';
448  }
449  continue;
450  }
451  $name = $matches[1];
452  $lowerName = strtolower( $name );
453  $attrStart = $i + strlen( $name ) + 1;
454 
455  // Find end of tag
456  $tagEndPos = $noMoreGT ? false : strpos( $text, '>', $attrStart );
457  if ( $tagEndPos === false ) {
458  // Infinite backtrack
459  // Disable tag search to prevent worst-case O(N^2) performance
460  $noMoreGT = true;
461  $accum .= '&lt;';
462  ++$i;
463  continue;
464  }
465 
466  // Handle ignored tags
467  if ( in_array( $lowerName, $ignoredTags ) ) {
468  $accum .= '<ignore>'
469  . htmlspecialchars( substr( $text, $i, $tagEndPos - $i + 1 ) )
470  . '</ignore>';
471  $i = $tagEndPos + 1;
472  continue;
473  }
474 
475  $tagStartPos = $i;
476  if ( $text[$tagEndPos - 1] == '/' ) {
477  $attrEnd = $tagEndPos - 1;
478  $inner = null;
479  $i = $tagEndPos + 1;
480  $close = '';
481  } else {
482  $attrEnd = $tagEndPos;
483  // Find closing tag
484  if (
485  !isset( $noMoreClosingTag[$name] ) &&
486  preg_match( "/<\/" . preg_quote( $name, '/' ) . "\s*>/i",
487  $text, $matches, PREG_OFFSET_CAPTURE, $tagEndPos + 1 )
488  ) {
489  $inner = substr( $text, $tagEndPos + 1, $matches[0][1] - $tagEndPos - 1 );
490  $i = $matches[0][1] + strlen( $matches[0][0] );
491  $close = '<close>' . htmlspecialchars( $matches[0][0] ) . '</close>';
492  } else {
493  // No end tag
494  if ( in_array( $name, $xmlishAllowMissingEndTag ) ) {
495  // Let it run out to the end of the text.
496  $inner = substr( $text, $tagEndPos + 1 );
497  $i = $lengthText;
498  $close = '';
499  } else {
500  // Don't match the tag, treat opening tag as literal and resume parsing.
501  $i = $tagEndPos + 1;
502  $accum .= htmlspecialchars( substr( $text, $tagStartPos, $tagEndPos + 1 - $tagStartPos ) );
503  // Cache results, otherwise we have O(N^2) performance for input like <foo><foo><foo>...
504  $noMoreClosingTag[$name] = true;
505  continue;
506  }
507  }
508  }
509  // <includeonly> and <noinclude> just become <ignore> tags
510  if ( in_array( $lowerName, $ignoredElements ) ) {
511  $accum .= '<ignore>' . htmlspecialchars( substr( $text, $tagStartPos, $i - $tagStartPos ) )
512  . '</ignore>';
513  continue;
514  }
515 
516  $accum .= '<ext>';
517  if ( $attrEnd <= $attrStart ) {
518  $attr = '';
519  } else {
520  $attr = substr( $text, $attrStart, $attrEnd - $attrStart );
521  }
522  $accum .= '<name>' . htmlspecialchars( $name ) . '</name>' .
523  // Note that the attr element contains the whitespace between name and attribute,
524  // this is necessary for precise reconstruction during pre-save transform.
525  '<attr>' . htmlspecialchars( $attr ) . '</attr>';
526  if ( $inner !== null ) {
527  $accum .= '<inner>' . htmlspecialchars( $inner ) . '</inner>';
528  }
529  $accum .= $close . '</ext>';
530  } elseif ( $found == 'line-start' ) {
531  // Is this the start of a heading?
532  // Line break belongs before the heading element in any case
533  if ( $fakeLineStart ) {
534  $fakeLineStart = false;
535  } else {
536  $accum .= $curChar;
537  $i++;
538  }
539 
540  $count = strspn( $text, '=', $i, 6 );
541  if ( $count == 1 && $findEquals ) {
542  // DWIM: This looks kind of like a name/value separator.
543  // Let's let the equals handler have it and break the
544  // potential heading. This is heuristic, but AFAICT the
545  // methods for completely correct disambiguation are very
546  // complex.
547  } elseif ( $count > 0 ) {
548  $piece = [
549  'open' => "\n",
550  'close' => "\n",
551  'parts' => [ new PPDPart( str_repeat( '=', $count ) ) ],
552  'startPos' => $i,
553  'count' => $count ];
554  $stack->push( $piece );
555  $accum =& $stack->getAccum();
556  $stackFlags = $stack->getFlags();
557  if ( isset( $stackFlags['findEquals'] ) ) {
558  $findEquals = $stackFlags['findEquals'];
559  }
560  if ( isset( $stackFlags['findPipe'] ) ) {
561  $findPipe = $stackFlags['findPipe'];
562  }
563  if ( isset( $stackFlags['inHeading'] ) ) {
564  $inHeading = $stackFlags['inHeading'];
565  }
566  $i += $count;
567  }
568  } elseif ( $found == 'line-end' ) {
569  $piece = $stack->top;
570  // A heading must be open, otherwise \n wouldn't have been in the search list
571  // FIXME: Don't use assert()
572  // phpcs:ignore MediaWiki.Usage.ForbiddenFunctions.assert
573  assert( $piece->open === "\n" );
574  $part = $piece->getCurrentPart();
575  // Search back through the input to see if it has a proper close.
576  // Do this using the reversed string since the other solutions
577  // (end anchor, etc.) are inefficient.
578  $wsLength = strspn( $revText, " \t", $lengthText - $i );
579  $searchStart = $i - $wsLength;
580  if ( isset( $part->commentEnd ) && $searchStart - 1 == $part->commentEnd ) {
581  // Comment found at line end
582  // Search for equals signs before the comment
583  $searchStart = $part->visualEnd;
584  $searchStart -= strspn( $revText, " \t", $lengthText - $searchStart );
585  }
586  $count = $piece->count;
587  $equalsLength = strspn( $revText, '=', $lengthText - $searchStart );
588  if ( $equalsLength > 0 ) {
589  if ( $searchStart - $equalsLength == $piece->startPos ) {
590  // This is just a single string of equals signs on its own line
591  // Replicate the doHeadings behavior /={count}(.+)={count}/
592  // First find out how many equals signs there really are (don't stop at 6)
593  $count = $equalsLength;
594  if ( $count < 3 ) {
595  $count = 0;
596  } else {
597  $count = min( 6, intval( ( $count - 1 ) / 2 ) );
598  }
599  } else {
600  $count = min( $equalsLength, $count );
601  }
602  if ( $count > 0 ) {
603  // Normal match, output <h>
604  $element = "<h level=\"$count\" i=\"$headingIndex\">$accum</h>";
605  $headingIndex++;
606  } else {
607  // Single equals sign on its own line, count=0
608  $element = $accum;
609  }
610  } else {
611  // No match, no <h>, just pass down the inner text
612  $element = $accum;
613  }
614  // Unwind the stack
615  $stack->pop();
616  $accum =& $stack->getAccum();
617  $stackFlags = $stack->getFlags();
618  if ( isset( $stackFlags['findEquals'] ) ) {
619  $findEquals = $stackFlags['findEquals'];
620  }
621  if ( isset( $stackFlags['findPipe'] ) ) {
622  $findPipe = $stackFlags['findPipe'];
623  }
624  if ( isset( $stackFlags['inHeading'] ) ) {
625  $inHeading = $stackFlags['inHeading'];
626  }
627 
628  // Append the result to the enclosing accumulator
629  $accum .= $element;
630  // Note that we do NOT increment the input pointer.
631  // This is because the closing linebreak could be the opening linebreak of
632  // another heading. Infinite loops are avoided because the next iteration MUST
633  // hit the heading open case above, which unconditionally increments the
634  // input pointer.
635  } elseif ( $found == 'open' ) {
636  # count opening brace characters
637  $curLen = strlen( $curChar );
638  $count = ( $curLen > 1 ) ?
639  # allow the final character to repeat
640  strspn( $text, $curChar[$curLen - 1], $i + 1 ) + 1 :
641  strspn( $text, $curChar, $i );
642 
643  $savedPrefix = '';
644  $lineStart = ( $i > 0 && $text[$i - 1] == "\n" );
645 
646  if ( $curChar === "-{" && $count > $curLen ) {
647  // -{ => {{ transition because rightmost wins
648  $savedPrefix = '-';
649  $i++;
650  $curChar = '{';
651  $count--;
652  $rule = $this->rules[$curChar];
653  }
654 
655  # we need to add to stack only if opening brace count is enough for one of the rules
656  if ( $count >= $rule['min'] ) {
657  # Add it to the stack
658  $piece = [
659  'open' => $curChar,
660  'close' => $rule['end'],
661  'savedPrefix' => $savedPrefix,
662  'count' => $count,
663  'lineStart' => $lineStart,
664  ];
665 
666  $stack->push( $piece );
667  $accum =& $stack->getAccum();
668  $stackFlags = $stack->getFlags();
669  if ( isset( $stackFlags['findEquals'] ) ) {
670  $findEquals = $stackFlags['findEquals'];
671  }
672  if ( isset( $stackFlags['findPipe'] ) ) {
673  $findPipe = $stackFlags['findPipe'];
674  }
675  if ( isset( $stackFlags['inHeading'] ) ) {
676  $inHeading = $stackFlags['inHeading'];
677  }
678  } else {
679  # Add literal brace(s)
680  $accum .= htmlspecialchars( $savedPrefix . str_repeat( $curChar, $count ) );
681  }
682  $i += $count;
683  } elseif ( $found == 'close' ) {
684  $piece = $stack->top;
685  # lets check if there are enough characters for closing brace
686  $maxCount = $piece->count;
687  if ( $piece->close === '}-' && $curChar === '}' ) {
688  $maxCount--; # don't try to match closing '-' as a '}'
689  }
690  $curLen = strlen( $curChar );
691  $count = ( $curLen > 1 ) ? $curLen :
692  strspn( $text, $curChar, $i, $maxCount );
693 
694  # check for maximum matching characters (if there are 5 closing
695  # characters, we will probably need only 3 - depending on the rules)
696  $rule = $this->rules[$piece->open];
697  if ( $count > $rule['max'] ) {
698  # The specified maximum exists in the callback array, unless the caller
699  # has made an error
700  $matchingCount = $rule['max'];
701  } else {
702  # Count is less than the maximum
703  # Skip any gaps in the callback array to find the true largest match
704  # Need to use array_key_exists not isset because the callback can be null
705  $matchingCount = $count;
706  while ( $matchingCount > 0 && !array_key_exists( $matchingCount, $rule['names'] ) ) {
707  --$matchingCount;
708  }
709  }
710 
711  if ( $matchingCount <= 0 ) {
712  # No matching element found in callback array
713  # Output a literal closing brace and continue
714  $endText = substr( $text, $i, $count );
715  $accum .= htmlspecialchars( $endText );
716  $i += $count;
717  continue;
718  }
719  $name = $rule['names'][$matchingCount];
720  if ( $name === null ) {
721  // No element, just literal text
722  $endText = substr( $text, $i, $matchingCount );
723  $element = $piece->breakSyntax( $matchingCount ) . $endText;
724  } else {
725  # Create XML element
726  # Note: $parts is already XML, does not need to be encoded further
727  $parts = $piece->parts;
728  $title = $parts[0]->out;
729  unset( $parts[0] );
730 
731  # The invocation is at the start of the line if lineStart is set in
732  # the stack, and all opening brackets are used up.
733  if ( $maxCount == $matchingCount &&
734  !empty( $piece->lineStart ) &&
735  strlen( $piece->savedPrefix ) == 0 ) {
736  $attr = ' lineStart="1"';
737  } else {
738  $attr = '';
739  }
740 
741  $element = "<$name$attr>";
742  $element .= "<title>$title</title>";
743  $argIndex = 1;
744  foreach ( $parts as $part ) {
745  if ( isset( $part->eqpos ) ) {
746  $argName = substr( $part->out, 0, $part->eqpos );
747  $argValue = substr( $part->out, $part->eqpos + 1 );
748  $element .= "<part><name>$argName</name>=<value>$argValue</value></part>";
749  } else {
750  $element .= "<part><name index=\"$argIndex\" /><value>{$part->out}</value></part>";
751  $argIndex++;
752  }
753  }
754  $element .= "</$name>";
755  }
756 
757  # Advance input pointer
758  $i += $matchingCount;
759 
760  # Unwind the stack
761  $stack->pop();
762  $accum =& $stack->getAccum();
763 
764  # Re-add the old stack element if it still has unmatched opening characters remaining
765  if ( $matchingCount < $piece->count ) {
766  $piece->parts = [ new PPDPart ];
767  $piece->count -= $matchingCount;
768  # do we still qualify for any callback with remaining count?
769  $min = $this->rules[$piece->open]['min'];
770  if ( $piece->count >= $min ) {
771  $stack->push( $piece );
772  $accum =& $stack->getAccum();
773  } elseif ( $piece->count == 1 && $piece->open === '{' && $piece->savedPrefix === '-' ) {
774  $piece->savedPrefix = '';
775  $piece->open = '-{';
776  $piece->count = 2;
777  $piece->close = $this->rules[$piece->open]['end'];
778  $stack->push( $piece );
779  $accum =& $stack->getAccum();
780  } else {
781  $s = substr( $piece->open, 0, -1 );
782  $s .= str_repeat(
783  substr( $piece->open, -1 ),
784  $piece->count - strlen( $s )
785  );
786  $accum .= $piece->savedPrefix . $s;
787  }
788  } elseif ( $piece->savedPrefix !== '' ) {
789  $accum .= $piece->savedPrefix;
790  }
791 
792  $stackFlags = $stack->getFlags();
793  if ( isset( $stackFlags['findEquals'] ) ) {
794  $findEquals = $stackFlags['findEquals'];
795  }
796  if ( isset( $stackFlags['findPipe'] ) ) {
797  $findPipe = $stackFlags['findPipe'];
798  }
799  if ( isset( $stackFlags['inHeading'] ) ) {
800  $inHeading = $stackFlags['inHeading'];
801  }
802 
803  # Add XML element to the enclosing accumulator
804  $accum .= $element;
805  } elseif ( $found == 'pipe' ) {
806  $findEquals = true; // shortcut for getFlags()
807  $stack->addPart();
808  $accum =& $stack->getAccum();
809  ++$i;
810  } elseif ( $found == 'equals' ) {
811  $findEquals = false; // shortcut for getFlags()
812  $stack->getCurrentPart()->eqpos = strlen( $accum );
813  $accum .= '=';
814  ++$i;
815  }
816  }
817 
818  # Output any remaining unclosed brackets
819  foreach ( $stack->stack as $piece ) {
820  $stack->rootAccum .= $piece->breakSyntax();
821  }
822  $stack->rootAccum .= '</root>';
823  $xml = $stack->rootAccum;
824 
825  return $xml;
826  }
827 }
cacheGetTree( $text, $flags)
Attempt to load a precomputed document tree for some given wikitext from the cache.
This document is intended to provide useful advice for parties seeking to redistribute MediaWiki to end users It s targeted particularly at maintainers for Linux since it s been observed that distribution packages of MediaWiki often break We ve consistently had to recommend that users seeking support use official tarballs instead of their distribution s and this often solves whatever problem the user is having It would be nice if this could such and we might be restricted by PHP settings such as safe mode or open_basedir We cannot assume that the software even has read access anywhere useful Many shared hosts run all users web applications under the same so they can t rely on Unix and must forbid reads to even standard directories like tmp lest users read each others files We cannot assume that the user has the ability to install or run any programs not written as web accessible PHP scripts Since anything that works on cheap shared hosting will work if you have shell or root access MediaWiki s design is based around catering to the lowest common denominator Although we support higher end setups as the way many things work by default is tailored toward shared hosting These defaults are unconventional from the point of view of and they certainly aren t ideal for someone who s installing MediaWiki as root
Stack class to help Preprocessor::preprocessToObj()
Definition: PPDStack.php:26
processing should stop and the error should be shown to the user * false
Definition: hooks.txt:187
The index of the header message $result[1]=The index of the body text message $result[2 through n]=Parameters passed to body text message. Please note the header message cannot receive/use parameters. 'ImportHandleLogItemXMLTag':When parsing a XML tag in a log item. Return false to stop further processing of the tag $reader:XMLReader object $logInfo:Array of information 'ImportHandlePageXMLTag':When parsing a XML tag in a page. Return false to stop further processing of the tag $reader:XMLReader object & $pageInfo:Array of information 'ImportHandleRevisionXMLTag':When parsing a XML tag in a page revision. Return false to stop further processing of the tag $reader:XMLReader object $pageInfo:Array of page information $revisionInfo:Array of revision information 'ImportHandleToplevelXMLTag':When parsing a top level XML tag. Return false to stop further processing of the tag $reader:XMLReader object 'ImportHandleUnknownUser':When a user doesn 't exist locally, this hook is called to give extensions an opportunity to auto-create it. If the auto-creation is successful, return false. $name:User name 'ImportHandleUploadXMLTag':When parsing a XML tag in a file upload. Return false to stop further processing of the tag $reader:XMLReader object $revisionInfo:Array of information 'ImportLogInterwikiLink':Hook to change the interwiki link used in log entries and edit summaries for transwiki imports. & $fullInterwikiPrefix:Interwiki prefix, may contain colons. & $pageTitle:String that contains page title. 'ImportSources':Called when reading from the $wgImportSources configuration variable. Can be used to lazy-load the import sources list. & $importSources:The value of $wgImportSources. Modify as necessary. See the comment in DefaultSettings.php for the detail of how to structure this array. 'InfoAction':When building information to display on the action=info page. $context:IContextSource object & $pageInfo:Array of information 'InitializeArticleMaybeRedirect':MediaWiki check to see if title is a redirect. & $title:Title object for the current page & $request:WebRequest & $ignoreRedirect:boolean to skip redirect check & $target:Title/string of redirect target & $article:Article object 'InternalParseBeforeLinks':during Parser 's internalParse method before links but after nowiki/noinclude/includeonly/onlyinclude and other processings. & $parser:Parser object & $text:string containing partially parsed text & $stripState:Parser 's internal StripState object 'InternalParseBeforeSanitize':during Parser 's internalParse method just before the parser removes unwanted/dangerous HTML tags and after nowiki/noinclude/includeonly/onlyinclude and other processings. Ideal for syntax-extensions after template/parser function execution which respect nowiki and HTML-comments. & $parser:Parser object & $text:string containing partially parsed text & $stripState:Parser 's internal StripState object 'InterwikiLoadPrefix':When resolving if a given prefix is an interwiki or not. Return true without providing an interwiki to continue interwiki search. $prefix:interwiki prefix we are looking for. & $iwData:output array describing the interwiki with keys iw_url, iw_local, iw_trans and optionally iw_api and iw_wikiid. 'InvalidateEmailComplete':Called after a user 's email has been invalidated successfully. $user:user(object) whose email is being invalidated 'IRCLineURL':When constructing the URL to use in an IRC notification. Callee may modify $url and $query, URL will be constructed as $url . $query & $url:URL to index.php & $query:Query string $rc:RecentChange object that triggered url generation 'IsFileCacheable':Override the result of Article::isFileCacheable()(if true) & $article:article(object) being checked 'IsTrustedProxy':Override the result of IP::isTrustedProxy() & $ip:IP being check & $result:Change this value to override the result of IP::isTrustedProxy() 'IsUploadAllowedFromUrl':Override the result of UploadFromUrl::isAllowedUrl() $url:URL used to upload from & $allowed:Boolean indicating if uploading is allowed for given URL 'isValidEmailAddr':Override the result of Sanitizer::validateEmail(), for instance to return false if the domain name doesn 't match your organization. $addr:The e-mail address entered by the user & $result:Set this and return false to override the internal checks 'isValidPassword':Override the result of User::isValidPassword() $password:The password entered by the user & $result:Set this and return false to override the internal checks $user:User the password is being validated for 'Language::getMessagesFileName':$code:The language code or the language we 're looking for a messages file for & $file:The messages file path, you can override this to change the location. 'LanguageGetNamespaces':Provide custom ordering for namespaces or remove namespaces. Do not use this hook to add namespaces. Use CanonicalNamespaces for that. & $namespaces:Array of namespaces indexed by their numbers 'LanguageGetTranslatedLanguageNames':Provide translated language names. & $names:array of language code=> language name $code:language of the preferred translations 'LanguageLinks':Manipulate a page 's language links. This is called in various places to allow extensions to define the effective language links for a page. $title:The page 's Title. & $links:Array with elements of the form "language:title" in the order that they will be output. & $linkFlags:Associative array mapping prefixed links to arrays of flags. Currently unused, but planned to provide support for marking individual language links in the UI, e.g. for featured articles. 'LanguageSelector':Hook to change the language selector available on a page. $out:The output page. $cssClassName:CSS class name of the language selector. 'LinkBegin':DEPRECATED since 1.28! Use HtmlPageLinkRendererBegin instead. Used when generating internal and interwiki links in Linker::link(), before processing starts. Return false to skip default processing and return $ret. See documentation for Linker::link() for details on the expected meanings of parameters. $skin:the Skin object $target:the Title that the link is pointing to & $html:the contents that the< a > tag should have(raw HTML) $result
Definition: hooks.txt:1980
if( $line===false) $args
Definition: cdb.php:64
Expansion frame with custom arguments.
this hook is for auditing only or null if authentication failed before getting that far or null if we can t even determine that When $user is not null
Definition: hooks.txt:780
An expansion frame, used as a context to expand the result of preprocessToObj()
Definition: PPFrame_DOM.php:28
const PTD_FOR_INCLUSION
Definition: Parser.php:108
This document is intended to provide useful advice for parties seeking to redistribute MediaWiki to end users It s targeted particularly at maintainers for Linux since it s been observed that distribution packages of MediaWiki often break We ve consistently had to recommend that users seeking support use official tarballs instead of their distribution s and this often solves whatever problem the user is having It would be nice if this could such as
Definition: distributors.txt:9
$wgDisableLangConversion
Whether to enable language variant conversion.
array cacheSetTree( $text, $flags, $tree)
Store a document tree in the cache.
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency which acts as the top level factory for services in MediaWiki which can be used to gain access to default instances of various services MediaWikiServices however also allows new services to be defined and default services to be redefined Services are defined or redefined by providing a callback the instantiator that will return a new instance of the service When it will create an instance of MediaWikiServices and populate it with the services defined in the files listed by thereby bootstrapping the DI framework Per $wgServiceWiringFiles lists includes ServiceWiring php
Definition: injection.txt:35
wfDeprecated( $function, $version=false, $component=false, $callerOffset=2)
Throws a warning that $function is deprecated.
preprocessToObj( $text, $flags=0)
Preprocess some wikitext and return the document tree.
Allows to change the fields on the form that will be generated $name
Definition: hooks.txt:271
newPartNodeArray( $values)
within a display generated by the Derivative if and wherever such third party notices normally appear The contents of the NOTICE file are for informational purposes only and do not modify the License You may add Your own attribution notices within Derivative Works that You alongside or as an addendum to the NOTICE text from the provided that such additional attribution notices cannot be construed as modifying the License You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for or distribution of Your or for any such Derivative Works as a provided Your and distribution of the Work otherwise complies with the conditions stated in this License Submission of Contributions Unless You explicitly state any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this without any additional terms or conditions Notwithstanding the nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions Trademarks This License does not grant permission to use the trade names
preprocessToXml( $text, $flags=0)
$matches