MediaWiki  1.23.1
Tidy.php
Go to the documentation of this file.
1 <?php
37 
41  protected $mTokens;
42 
43  protected $mUniqPrefix;
44 
45  protected $mMarkerIndex;
46 
47  public function __construct() {
48  $this->mTokens = null;
49  $this->mUniqPrefix = null;
50  }
51 
56  public function getWrapped( $text ) {
57  $this->mTokens = new ReplacementArray;
58  $this->mUniqPrefix = "\x7fUNIQ" .
59  dechex( mt_rand( 0, 0x7fffffff ) ) . dechex( mt_rand( 0, 0x7fffffff ) );
60  $this->mMarkerIndex = 0;
61 
62  // Replace <mw:editsection> elements with placeholders
63  $wrappedtext = preg_replace_callback( ParserOutput::EDITSECTION_REGEX,
64  array( &$this, 'replaceCallback' ), $text );
65  // ...and <mw:toc> markers
66  $wrappedtext = preg_replace_callback( '/<\\/?mw:toc>/',
67  array( &$this, 'replaceCallback' ), $wrappedtext );
68 
69  // Modify inline Microdata <link> and <meta> elements so they say <html-link> and <html-meta> so
70  // we can trick Tidy into not stripping them out by including them in tidy's new-empty-tags config
71  $wrappedtext = preg_replace( '!<(link|meta)([^>]*?)(/{0,1}>)!', '<html-$1$2$3', $wrappedtext );
72 
73  // Wrap the whole thing in a doctype and body for Tidy.
74  $wrappedtext = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"' .
75  ' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"><html>' .
76  '<head><title>test</title></head><body>' . $wrappedtext . '</body></html>';
77 
78  return $wrappedtext;
79  }
80 
86  function replaceCallback( $m ) {
87  $marker = "{$this->mUniqPrefix}-item-{$this->mMarkerIndex}" . Parser::MARKER_SUFFIX;
88  $this->mMarkerIndex++;
89  $this->mTokens->setPair( $marker, $m[0] );
90  return $marker;
91  }
92 
97  public function postprocess( $text ) {
98  // Revert <html-{link,meta}> back to <{link,meta}>
99  $text = preg_replace( '!<html-(link|meta)([^>]*?)(/{0,1}>)!', '<$1$2$3', $text );
100 
101  // Restore the contents of placeholder tokens
102  $text = $this->mTokens->replace( $text );
103 
104  return $text;
105  }
106 
107 }
108 
118 class MWTidy {
127  public static function tidy( $text ) {
128  global $wgTidyInternal;
129 
130  $wrapper = new MWTidyWrapper;
131  $wrappedtext = $wrapper->getWrapped( $text );
132 
133  $retVal = null;
134  if ( $wgTidyInternal ) {
135  $correctedtext = self::execInternalTidy( $wrappedtext, false, $retVal );
136  } else {
137  $correctedtext = self::execExternalTidy( $wrappedtext, false, $retVal );
138  }
139 
140  if ( $retVal < 0 ) {
141  wfDebug( "Possible tidy configuration error!\n" );
142  return $text . "\n<!-- Tidy was unable to run -->\n";
143  } elseif ( is_null( $correctedtext ) ) {
144  wfDebug( "Tidy error detected!\n" );
145  return $text . "\n<!-- Tidy found serious XHTML errors -->\n";
146  }
147 
148  $correctedtext = $wrapper->postprocess( $correctedtext ); // restore any hidden tokens
149 
150  return $correctedtext;
151  }
152 
160  public static function checkErrors( $text, &$errorStr = null ) {
161  global $wgTidyInternal;
162 
163  $retval = 0;
164  if ( $wgTidyInternal ) {
165  $errorStr = self::execInternalTidy( $text, true, $retval );
166  } else {
167  $errorStr = self::execExternalTidy( $text, true, $retval );
168  }
169 
170  return ( $retval < 0 && $errorStr == '' ) || $retval == 0;
171  }
172 
182  private static function execExternalTidy( $text, $stderr = false, &$retval = null ) {
183  global $wgTidyConf, $wgTidyBin, $wgTidyOpts;
184  wfProfileIn( __METHOD__ );
185 
186  $cleansource = '';
187  $opts = ' -utf8';
188 
189  if ( $stderr ) {
190  $descriptorspec = array(
191  0 => array( 'pipe', 'r' ),
192  1 => array( 'file', wfGetNull(), 'a' ),
193  2 => array( 'pipe', 'w' )
194  );
195  } else {
196  $descriptorspec = array(
197  0 => array( 'pipe', 'r' ),
198  1 => array( 'pipe', 'w' ),
199  2 => array( 'file', wfGetNull(), 'a' )
200  );
201  }
202 
203  $readpipe = $stderr ? 2 : 1;
204  $pipes = array();
205 
206  $process = proc_open(
207  "$wgTidyBin -config $wgTidyConf $wgTidyOpts$opts", $descriptorspec, $pipes );
208 
209  //NOTE: At least on linux, the process will be created even if tidy is not installed.
210  // This means that missing tidy will be treated as a validation failure.
211 
212  if ( is_resource( $process ) ) {
213  // Theoretically, this style of communication could cause a deadlock
214  // here. If the stdout buffer fills up, then writes to stdin could
215  // block. This doesn't appear to happen with tidy, because tidy only
216  // writes to stdout after it's finished reading from stdin. Search
217  // for tidyParseStdin and tidySaveStdout in console/tidy.c
218  fwrite( $pipes[0], $text );
219  fclose( $pipes[0] );
220  while ( !feof( $pipes[$readpipe] ) ) {
221  $cleansource .= fgets( $pipes[$readpipe], 1024 );
222  }
223  fclose( $pipes[$readpipe] );
224  $retval = proc_close( $process );
225  } else {
226  wfWarn( "Unable to start external tidy process" );
227  $retval = -1;
228  }
229 
230  if ( !$stderr && $cleansource == '' && $text != '' ) {
231  // Some kind of error happened, so we couldn't get the corrected text.
232  // Just give up; we'll use the source text and append a warning.
233  $cleansource = null;
234  }
235 
236  wfProfileOut( __METHOD__ );
237  return $cleansource;
238  }
239 
249  private static function execInternalTidy( $text, $stderr = false, &$retval = null ) {
250  global $wgTidyConf, $wgDebugTidy;
251  wfProfileIn( __METHOD__ );
252 
253  if ( !class_exists( 'tidy' ) ) {
254  wfWarn( "Unable to load internal tidy class." );
255  $retval = -1;
256 
257  wfProfileOut( __METHOD__ );
258  return null;
259  }
260 
261  $tidy = new tidy;
262  $tidy->parseString( $text, $wgTidyConf, 'utf8' );
263 
264  if ( $stderr ) {
265  $retval = $tidy->getStatus();
266 
267  wfProfileOut( __METHOD__ );
268  return $tidy->errorBuffer;
269  }
270 
271  $tidy->cleanRepair();
272  $retval = $tidy->getStatus();
273  if ( $retval == 2 ) {
274  // 2 is magic number for fatal error
275  // http://www.php.net/manual/en/function.tidy-get-status.php
276  $cleansource = null;
277  } else {
278  $cleansource = tidy_get_output( $tidy );
279  if ( $wgDebugTidy && $retval > 0 ) {
280  $cleansource .= "<!--\nTidy reports:\n" .
281  str_replace( '-->', '--&gt;', $tidy->errorBuffer ) .
282  "\n-->";
283  }
284  }
285 
286  wfProfileOut( __METHOD__ );
287  return $cleansource;
288  }
289 }
php
skin txt MediaWiki includes four core it has been set as the default in MediaWiki since the replacing Monobook it had been been the default skin since before being replaced by Vector largely rewritten in while keeping its appearance Several legacy skins were removed in the as the burden of supporting them became too heavy to bear Those in etc for skin dependent CSS etc for skin dependent JavaScript These can also be customised on a per user by etc This feature has led to a wide variety of user styles becoming that gallery is a good place to ending in php
Definition: skin.txt:62
MWTidyWrapper\$mTokens
ReplacementArray $mTokens
Definition: Tidy.php:40
wfProfileIn
wfProfileIn( $functionname)
Begin profiling of a function.
Definition: Profiler.php:33
MWTidyWrapper\__construct
__construct()
Definition: Tidy.php:46
MWTidyWrapper\postprocess
postprocess( $text)
Definition: Tidy.php:96
ParserOutput\EDITSECTION_REGEX
const EDITSECTION_REGEX
Definition: ParserOutput.php:60
wfProfileOut
wfProfileOut( $functionname='missing')
Stop profiling of a function.
Definition: Profiler.php:46
array
the array() calling protocol came about after MediaWiki 1.4rc1.
List of Api Query prop modules.
MWTidy\execInternalTidy
static execInternalTidy( $text, $stderr=false, &$retval=null)
Use the HTML tidy extension to use the tidy library in-process, saving the overhead of spawning a new...
Definition: Tidy.php:248
MWTidy
Class to interact with HTML tidy.
Definition: Tidy.php:117
global
when a variable name is used in a it is silently declared as a new masking the global
Definition: design.txt:93
wfDebug
wfDebug( $text, $dest='all')
Sends a line to the debug log if enabled or, optionally, to a comment in output.
Definition: GlobalFunctions.php:933
ReplacementArray
Replacement array for FSS with fallback to strtr() Supports lazy initialisation of FSS resource.
Definition: StringUtils.php:411
MWTidy\execExternalTidy
static execExternalTidy( $text, $stderr=false, &$retval=null)
Spawn an external HTML tidy process and get corrected markup back from it.
Definition: Tidy.php:181
MWTidyWrapper
Class used to hide mw:editsection tokens from Tidy so that it doesn't break them or break on them.
Definition: Tidy.php:36
wfGetNull
wfGetNull()
Get a platform-independent path to the null file, e.g.
Definition: GlobalFunctions.php:3780
MWTidyWrapper\getWrapped
getWrapped( $text)
Definition: Tidy.php:55
MWTidyWrapper\$mMarkerIndex
$mMarkerIndex
Definition: Tidy.php:44
wfWarn
wfWarn( $msg, $callerOffset=1, $level=E_USER_NOTICE)
Send a warning either to the debug log or in a PHP error depending on $wgDevelopmentWarnings.
Definition: GlobalFunctions.php:1141
MWTidyWrapper\$mUniqPrefix
$mUniqPrefix
Definition: Tidy.php:42
MWTidyWrapper\replaceCallback
replaceCallback( $m)
Definition: Tidy.php:85
MWTidy\checkErrors
static checkErrors( $text, &$errorStr=null)
Check HTML for errors, used if $wgValidateAllHtml = true.
Definition: Tidy.php:159
$retval
please add to it if you re going to add events to the MediaWiki code where normally authentication against an external auth plugin would be creating a account incomplete not yet checked for validity & $retval
Definition: hooks.txt:237
MWTidy\tidy
static tidy( $text)
Interface with html tidy, used if $wgUseTidy = true.
Definition: Tidy.php:126