Go to the documentation of this file.
44 require_once __DIR__ .
'/../Maintenance.php';
59 parent::__construct();
60 $this->mDescription =
'Compress the text of a wiki';
61 $this->
addOption(
'type',
'Set compression type to either: gzip|concat',
false,
true,
't' );
62 $this->
addOption(
'chunksize',
'Maximum number of revisions in a concat chunk',
false,
true,
'c' );
63 $this->
addOption(
'begin-date',
'Earliest date to check for uncompressed revisions',
false,
true,
'b' );
64 $this->
addOption(
'end-date',
'Latest revision date to compress',
false,
true,
'e' );
65 $this->
addOption(
'startid',
'The id to start from (gzip -> text table, concat -> page table)',
false,
true,
's' );
66 $this->
addOption(
'extdb',
'Store specified revisions in an external cluster (untested)',
false,
true );
67 $this->
addOption(
'endid',
'The page_id to stop at (only when using concat compression type)',
false,
true,
'n' );
72 if ( !function_exists(
"gzdeflate" ) ) {
73 $this->
error(
"You must enable zlib support in PHP to compress old revisions!\n" .
74 "Please see http://www.php.net/manual/en/ref.zlib.php\n",
true );
78 $chunkSize = $this->
getOption(
'chunksize', 20 );
79 $startId = $this->
getOption(
'startid', 0 );
80 $beginDate = $this->
getOption(
'begin-date',
'' );
81 $endDate = $this->
getOption(
'end-date',
'' );
83 $endId = $this->
getOption(
'endid',
false );
86 $this->
error(
"Type \"{$type}\" not supported" );
90 $this->
output(
"Compressing database {$wgDBname} to external cluster {$extDB}\n"
91 . str_repeat(
'-', 76 ) .
"\n\n" );
93 $this->
output(
"Compressing database {$wgDBname}\n"
94 . str_repeat(
'-', 76 ) .
"\n\n" );
98 if (
$type ==
'concat' ) {
100 $endDate, $extDB, $endId );
106 $this->
output(
"Done.\n" );
113 $this->
output(
"Starting from old_id $start...\n" );
116 $res = $dbw->select(
'text',
array(
'old_id',
'old_flags',
'old_text' ),
117 "old_id>=$start", __METHOD__,
array(
'ORDER BY' =>
'old_id',
'LIMIT' => $chunksize,
'FOR UPDATE' ) );
118 if (
$res->numRows() == 0 ) {
122 foreach (
$res as $row ) {
123 # print " {$row->old_id} - {$row->old_namespace}:{$row->old_title}\n";
125 $last = $row->old_id;
127 $start =
$last + 1; # Deletion may leave
long empty stretches
128 $this->
output(
"$start...\n" );
139 if (
false !== strpos( $row->old_flags,
'gzip' ) ||
false !== strpos( $row->old_flags,
'object' ) ) {
140 #print "Already compressed row {$row->old_id}\n";
144 $flags = $row->old_flags ?
"{$row->old_flags},gzip" :
"gzip";
145 $compress = gzdeflate( $row->old_text );
147 # Store in external storage if required
148 if ( $extdb !==
'' ) {
150 $compress = $storeObj->
store( $extdb, $compress );
151 if ( $compress ===
false ) {
152 $this->
error(
"Unable to store object" );
158 $dbw->update(
'text',
161 'old_text' => $compress
163 'old_id' => $row->old_id
165 array(
'LIMIT' => 1 )
180 $endDate, $extdb =
"", $maxPageId =
false
187 # Set up external storage
188 if ( $extdb !=
'' ) {
192 # Get all articles by page_id
194 $maxPageId =
$dbr->selectField(
'page',
'max(page_id)',
'', __METHOD__ );
196 $this->
output(
"Starting from $startId of $maxPageId\n" );
197 $pageConds =
array();
209 # For each article, get a list of revisions which fit the criteria
211 # No recompression, use a condition on old_flags
212 # Don't compress object type entities, because that might produce data loss when
213 # overwriting bulk storage concat rows. Don't compress external references, because
214 # the script doesn't yet delete rows from external storage.
216 'old_flags NOT ' .
$dbr->buildLike(
$dbr->anyString(),
'object',
$dbr->anyString() ) .
' AND old_flags NOT '
217 .
$dbr->buildLike(
$dbr->anyString(),
'external',
$dbr->anyString() ) );
220 if ( !preg_match(
'/^\d{14}$/', $beginDate ) ) {
221 $this->
error(
"Invalid begin date \"$beginDate\"\n" );
224 $conds[] =
"rev_timestamp>'" . $beginDate .
"'";
227 if ( !preg_match(
'/^\d{14}$/', $endDate ) ) {
228 $this->
error(
"Invalid end date \"$endDate\"\n" );
231 $conds[] =
"rev_timestamp<'" . $endDate .
"'";
233 if ( $loadStyle == self::LS_CHUNKED ) {
235 $fields =
array(
'rev_id',
'rev_text_id',
'old_flags',
'old_text' );
236 $conds[] =
'rev_text_id=old_id';
237 $revLoadOptions =
'FOR UPDATE';
240 $fields =
array(
'rev_id',
'rev_text_id' );
241 $revLoadOptions =
array();
244 # Don't work with current revisions
245 # Don't lock the page table for update either -- TS 2006-04-04
247 #$conds[] = 'page_id=rev_page AND rev_id != page_latest';
249 for ( $pageId = $startId; $pageId <= $maxPageId; $pageId++ ) {
256 $pageRes =
$dbr->select(
'page',
257 array(
'page_id',
'page_namespace',
'page_title',
'page_latest' ),
258 $pageConds +
array(
'page_id' => $pageId ), __METHOD__ );
259 if ( $pageRes->numRows() == 0 ) {
262 $pageRow =
$dbr->fetchObject( $pageRes );
265 $titleObj =
Title::makeTitle( $pageRow->page_namespace, $pageRow->page_title );
266 $this->
output(
"$pageId\t" . $titleObj->getPrefixedDBkey() .
" " );
269 $revRes = $dbw->select(
$tables, $fields,
271 'rev_page' => $pageRow->page_id,
272 # Don
't operate on the current revision
273 # Use < instead of <> in case the current revision has changed
274 # since the page select, which wasn't locking
275 'rev_id < ' . $pageRow->page_latest
281 foreach ( $revRes
as $revRow ) {
285 if ( count( $revs ) < 2 ) {
286 # No revisions matching, no further processing
293 while ( $i < count( $revs ) ) {
294 if ( $i < count( $revs ) - $maxChunkSize ) {
295 $thisChunkSize = $maxChunkSize;
297 $thisChunkSize = count( $revs ) - $i;
302 $dbw->begin( __METHOD__ );
304 $primaryOldid = $revs[$i]->rev_text_id;
306 # Get the text of each revision and add it to the object
307 for ( $j = 0; $j < $thisChunkSize && $chunk->isHappy(); $j++ ) {
308 $oldid = $revs[$i + $j]->rev_text_id;
311 if ( $loadStyle == self::LS_INDIVIDUAL ) {
312 $textRow = $dbw->selectRow(
'text',
313 array(
'old_flags',
'old_text' ),
314 array(
'old_id' => $oldid ),
323 if ( $text ===
false ) {
324 $this->
error(
"\nError, unable to get text in old_id $oldid" );
325 #$dbw->delete( 'old', array( 'old_id' => $oldid ) );
328 if ( $extdb ==
"" && $j == 0 ) {
329 $chunk->setText( $text );
332 # Don't make a stub if it's going to be longer than the article
333 # Stubs are typically about 100 bytes
334 if ( strlen( $text ) < 120 ) {
340 $stub->setReferrer( $oldid );
349 # If we couldn't actually use any stubs because the pages were too small, do nothing
351 if ( $extdb !=
"" ) {
352 # Move blob objects to External Storage
353 $stored = $storeObj->store( $extdb, serialize( $chunk ));
354 if ( $stored ===
false ) {
355 $this->
error(
"Unable to store object" );
358 # Store External Storage URLs instead of Stub placeholders
359 foreach ( $stubs
as $stub ) {
360 if ( $stub ===
false ) {
363 # $stored should provide base path to a BLOB
364 $url = $stored .
"/" . $stub->getHash();
365 $dbw->update(
'text',
368 'old_flags' =>
'external,utf-8',
370 'old_id' => $stub->getReferrer(),
375 # Store the main object locally
376 $dbw->update(
'text',
378 'old_text' => serialize( $chunk ),
379 'old_flags' =>
'object,utf-8',
381 'old_id' => $primaryOldid
385 # Store the stub objects
386 for ( $j = 1; $j < $thisChunkSize; $j++ ) {
387 # Skip if not compressing and don't overwrite the first revision
388 if ( $stubs[$j] !==
false && $revs[$i + $j]->rev_text_id != $primaryOldid ) {
389 $dbw->update(
'text',
391 'old_text' => serialize( $stubs[$j] ),
392 'old_flags' =>
'object,utf-8',
394 'old_id' => $revs[$i + $j]->rev_text_id
403 $dbw->commit( __METHOD__ );
404 $i += $thisChunkSize;
static & makeTitle( $ns, $title, $fragment='', $interwiki='')
Create a new Title from a namespace index and a DB key.
skin txt MediaWiki includes four core it has been set as the default in MediaWiki since the replacing Monobook it had been been the default skin since before being replaced by Vector largely rewritten in while keeping its appearance Several legacy skins were removed in the as the burden of supporting them became too heavy to bear Those in etc for skin dependent CSS etc for skin dependent JavaScript These can also be customised on a per user by etc This feature has led to a wide variety of user styles becoming that gallery is a good place to ending in php
DB accessable external objects.
namespace and then decline to actually register it RecentChangesLinked and Watchlist RecentChangesLinked and Watchlist e g Watchlist & $tables
Pointer object for an item within a CGZ blob stored in the text table.
compressWithConcat( $startId, $maxChunkSize, $beginDate, $endDate, $extdb="", $maxPageId=false)
& wfGetDB( $db, $groups=array(), $wiki=false)
Get a Database object.
Concatenated gzip (CGZ) storage Improves compression ratio by concatenating like objects before gzipp...
addOption( $name, $description, $required=false, $withArg=false, $shortName=false)
Add a parameter to the script.
require_once RUN_MAINTENANCE_IF_MAIN
__construct()
Default constructor.
static getRevisionText( $row, $prefix='old_', $wiki=false)
Get revision text associated with an old or archive row $row is usually an object from wfFetchRow(),...
Abstract maintenance class for quickly writing and churning out maintenance scripts with minimal effo...
it s the revision text itself In either if gzip is the revision text is gzipped $flags
execute()
Do the actual work.
controlled by $wgMainCacheType controlled by $wgParserCacheType controlled by $wgMessageCacheType If you set CACHE_NONE to one of the three control default value for MediaWiki still create a but requests to it are no ops and we always fall through to the database If the cache daemon can t be it should also disable itself fairly smoothly By $wgMemc is used but when it is $parserMemc or $messageMemc this is mentioned $wgDBname
compressOldPages( $start=0, $extdb='')
compressPage( $row, $extdb)
the array() calling protocol came about after MediaWiki 1.4rc1.
List of Api Query prop modules.
when a variable name is used in a it is silently declared as a new masking the global
wfWaitForSlaves( $maxLag=false, $wiki=false, $cluster=false)
Modern version of wfWaitForSlaves().
Maintenance script that compress the text of a wiki.
setLocation( $id)
Sets the location (old_id) of the main object to which this object points.
getOption( $name, $default=null)
Get an option, or return the default.
This document is intended to provide useful advice for parties seeking to redistribute MediaWiki to end users It s targeted particularly at maintainers for Linux since it s been observed that distribution packages of MediaWiki often break We ve consistently had to recommend that users seeking support use official tarballs instead of their distribution s and this often solves whatever problem the user is having It would be nice if this could such as
error( $err, $die=0)
Throw an error to the user.
output( $out, $channel=null)
Throw some output to the user.