MediaWiki  1.23.2
convertLinks.php
Go to the documentation of this file.
1 <?php
24 require_once __DIR__ . '/Maintenance.php';
25 
34 class ConvertLinks extends Maintenance {
35  private $logPerformance;
36 
37  public function __construct() {
38  parent::__construct();
39  $this->mDescription = "Convert from the old links schema (string->ID) to the new schema (ID->ID).
40 The wiki should be put into read-only mode while this script executes";
41 
42  $this->addArg( 'logperformance', "Log performance to perfLogFilename.", false );
43  $this->addArg( 'perfLogFilename', "Filename where performance is logged if --logperformance was set (defaults to 'convLinksPerf.txt').", false );
44  $this->addArg( 'keep-links-table', "Don't overwrite the old links table with the new one, leave the new table at links_temp.", false );
45  $this->addArg( 'nokeys', "Don't create keys, and so allow duplicates in the new links table.\n
46 This gives a huge speed improvement for very large links tables which are MyISAM." /* (What about InnoDB?) */, false );
47  }
48 
49  public function getDbType() {
50  return Maintenance::DB_ADMIN;
51  }
52 
53  public function execute() {
54  $dbw = wfGetDB( DB_MASTER );
55 
56  $type = $dbw->getType();
57  if ( $type != 'mysql' ) {
58  $this->output( "Link table conversion not necessary for $type\n" );
59  return;
60  }
61 
63 
64  $numBadLinks = $curRowsRead = 0; # counters etc
65  $totalTuplesInserted = 0; # total tuples INSERTed into links_temp
66 
67  $reportCurReadProgress = true; # whether or not to give progress reports while reading IDs from cur table
68  $curReadReportInterval = 1000; # number of rows between progress reports
69 
70  $reportLinksConvProgress = true; # whether or not to give progress reports during conversion
71  $linksConvInsertInterval = 1000; # number of rows per INSERT
72 
73  $initialRowOffset = 0;
74  # $finalRowOffset = 0; # not used yet; highest row number from links table to process
75 
76  $overwriteLinksTable = !$this->hasOption( 'keep-links-table' );
77  $noKeys = $this->hasOption( 'noKeys' );
78  $this->logPerformance = $this->hasOption( 'logperformance' );
79  $perfLogFilename = $this->getArg( 'perfLogFilename', "convLinksPerf.txt" );
80 
81  # --------------------------------------------------------------------
82 
83  list( $cur, $links, $links_temp, $links_backup ) = $dbw->tableNamesN( 'cur', 'links', 'links_temp', 'links_backup' );
84 
85  if ( $dbw->tableExists( 'pagelinks' ) ) {
86  $this->output( "...have pagelinks; skipping old links table updates\n" );
87  return;
88  }
89 
90  $res = $dbw->query( "SELECT l_from FROM $links LIMIT 1" );
91  if ( $dbw->fieldType( $res, 0 ) == "int" ) {
92  $this->output( "Schema already converted\n" );
93  return;
94  }
95 
96  $res = $dbw->query( "SELECT COUNT(*) AS count FROM $links" );
97  $row = $dbw->fetchObject( $res );
98  $numRows = $row->count;
99  $dbw->freeResult( $res );
100 
101  if ( $numRows == 0 ) {
102  $this->output( "Updating schema (no rows to convert)...\n" );
103  $this->createTempTable();
104  } else {
105  $fh = false;
106  if ( $this->logPerformance ) {
107  $fh = fopen ( $perfLogFilename, "w" );
108  if ( !$fh ) {
109  $this->error( "Couldn't open $perfLogFilename" );
110  $this->logPerformance = false;
111  }
112  }
113  $baseTime = $startTime = $this->getMicroTime();
114  # Create a title -> cur_id map
115  $this->output( "Loading IDs from $cur table...\n" );
116  $this->performanceLog ( $fh, "Reading $numRows rows from cur table...\n" );
117  $this->performanceLog ( $fh, "rows read vs seconds elapsed:\n" );
118 
119  $dbw->bufferResults( false );
120  $res = $dbw->query( "SELECT cur_namespace,cur_title,cur_id FROM $cur" );
121  $ids = array();
122 
123  foreach ( $res as $row ) {
124  $title = $row->cur_title;
125  if ( $row->cur_namespace ) {
126  $title = $wgContLang->getNsText( $row->cur_namespace ) . ":$title";
127  }
128  $ids[$title] = $row->cur_id;
129  $curRowsRead++;
130  if ( $reportCurReadProgress ) {
131  if ( ( $curRowsRead % $curReadReportInterval ) == 0 ) {
132  $this->performanceLog( $fh, $curRowsRead . " " . ( $this->getMicroTime() - $baseTime ) . "\n" );
133  $this->output( "\t$curRowsRead rows of $cur table read.\n" );
134  }
135  }
136  }
137  $dbw->freeResult( $res );
138  $dbw->bufferResults( true );
139  $this->output( "Finished loading IDs.\n\n" );
140  $this->performanceLog( $fh, "Took " . ( $this->getMicroTime() - $baseTime ) . " seconds to load IDs.\n\n" );
141 
142  # --------------------------------------------------------------------
143 
144  # Now, step through the links table (in chunks of $linksConvInsertInterval rows),
145  # convert, and write to the new table.
146  $this->createTempTable();
147  $this->performanceLog( $fh, "Resetting timer.\n\n" );
148  $baseTime = $this->getMicroTime();
149  $this->output( "Processing $numRows rows from $links table...\n" );
150  $this->performanceLog( $fh, "Processing $numRows rows from $links table...\n" );
151  $this->performanceLog( $fh, "rows inserted vs seconds elapsed:\n" );
152 
153  for ( $rowOffset = $initialRowOffset; $rowOffset < $numRows; $rowOffset += $linksConvInsertInterval ) {
154  $sqlRead = "SELECT * FROM $links ";
155  $sqlRead = $dbw->limitResult( $sqlRead, $linksConvInsertInterval, $rowOffset );
156  $res = $dbw->query( $sqlRead );
157  if ( $noKeys ) {
158  $sqlWrite = array( "INSERT INTO $links_temp (l_from,l_to) VALUES " );
159  } else {
160  $sqlWrite = array( "INSERT IGNORE INTO $links_temp (l_from,l_to) VALUES " );
161  }
162 
163  $tuplesAdded = 0; # no tuples added to INSERT yet
164  foreach ( $res as $row ) {
165  $fromTitle = $row->l_from;
166  if ( array_key_exists( $fromTitle, $ids ) ) { # valid title
167  $from = $ids[$fromTitle];
168  $to = $row->l_to;
169  if ( $tuplesAdded != 0 ) {
170  $sqlWrite[] = ",";
171  }
172  $sqlWrite[] = "($from,$to)";
173  $tuplesAdded++;
174  } else { # invalid title
175  $numBadLinks++;
176  }
177  }
178  $dbw->freeResult( $res );
179  # $this->output( "rowOffset: $rowOffset\ttuplesAdded: $tuplesAdded\tnumBadLinks: $numBadLinks\n" );
180  if ( $tuplesAdded != 0 ) {
181  if ( $reportLinksConvProgress ) {
182  $this->output( "Inserting $tuplesAdded tuples into $links_temp..." );
183  }
184  $dbw->query( implode( "", $sqlWrite ) );
185  $totalTuplesInserted += $tuplesAdded;
186  if ( $reportLinksConvProgress ) {
187  $this->output( " done. Total $totalTuplesInserted tuples inserted.\n" );
188  $this->performanceLog( $fh, $totalTuplesInserted . " " . ( $this->getMicroTime() - $baseTime ) . "\n" );
189  }
190  }
191  }
192  $this->output( "$totalTuplesInserted valid titles and $numBadLinks invalid titles were processed.\n\n" );
193  $this->performanceLog( $fh, "$totalTuplesInserted valid titles and $numBadLinks invalid titles were processed.\n" );
194  $this->performanceLog( $fh, "Total execution time: " . ( $this->getMicroTime() - $startTime ) . " seconds.\n" );
195  if ( $this->logPerformance ) {
196  fclose ( $fh );
197  }
198  }
199  # --------------------------------------------------------------------
200 
201  if ( $overwriteLinksTable ) {
202  # Check for existing links_backup, and delete it if it exists.
203  $this->output( "Dropping backup links table if it exists..." );
204  $dbw->query( "DROP TABLE IF EXISTS $links_backup", __METHOD__ );
205  $this->output( " done.\n" );
206 
207  # Swap in the new table, and move old links table to links_backup
208  $this->output( "Swapping tables '$links' to '$links_backup'; '$links_temp' to '$links'..." );
209  $dbw->query( "RENAME TABLE links TO $links_backup, $links_temp TO $links", __METHOD__ );
210  $this->output( " done.\n\n" );
211 
212  $this->output( "Conversion complete. The old table remains at $links_backup;\n" );
213  $this->output( "delete at your leisure.\n" );
214  } else {
215  $this->output( "Conversion complete. The converted table is at $links_temp;\n" );
216  $this->output( "the original links table is unchanged.\n" );
217  }
218  }
219 
220  private function createTempTable() {
221  $dbConn = wfGetDB( DB_MASTER );
222 
223  if ( !( $dbConn->isOpen() ) ) {
224  $this->output( "Opening connection to database failed.\n" );
225  return;
226  }
227  $links_temp = $dbConn->tableName( 'links_temp' );
228 
229  $this->output( "Dropping temporary links table if it exists..." );
230  $dbConn->query( "DROP TABLE IF EXISTS $links_temp" );
231  $this->output( " done.\n" );
232 
233  $this->output( "Creating temporary links table..." );
234  if ( $this->hasOption( 'noKeys' ) ) {
235  $dbConn->query( "CREATE TABLE $links_temp ( " .
236  "l_from int(8) unsigned NOT NULL default '0', " .
237  "l_to int(8) unsigned NOT NULL default '0')" );
238  } else {
239  $dbConn->query( "CREATE TABLE $links_temp ( " .
240  "l_from int(8) unsigned NOT NULL default '0', " .
241  "l_to int(8) unsigned NOT NULL default '0', " .
242  "UNIQUE KEY l_from(l_from,l_to), " .
243  "KEY (l_to))" );
244  }
245  $this->output( " done.\n\n" );
246  }
247 
248  private function performanceLog( $fh, $text ) {
249  if ( $this->logPerformance ) {
250  fwrite( $fh, $text );
251  }
252  }
253 
254  private function getMicroTime() { # return time in seconds, with microsecond accuracy
255  list( $usec, $sec ) = explode( " ", microtime() );
256  return ( (float)$usec + (float)$sec );
257  }
258 }
259 
260 $maintClass = "ConvertLinks";
261 require_once RUN_MAINTENANCE_IF_MAIN;
DB_MASTER
const DB_MASTER
Definition: Defines.php:56
of
globals txt Globals are evil The original MediaWiki code relied on globals for processing context far too often MediaWiki development since then has been a story of slowly moving context out of global variables and into objects Storing processing context in object member variables allows those objects to be reused in a much more flexible way Consider the elegance of
Definition: globals.txt:10
php
skin txt MediaWiki includes four core it has been set as the default in MediaWiki since the replacing Monobook it had been been the default skin since before being replaced by Vector largely rewritten in while keeping its appearance Several legacy skins were removed in the as the burden of supporting them became too heavy to bear Those in etc for skin dependent CSS etc for skin dependent JavaScript These can also be customised on a per user by etc This feature has led to a wide variety of user styles becoming that gallery is a good place to ending in php
Definition: skin.txt:62
wfGetDB
& wfGetDB( $db, $groups=array(), $wiki=false)
Get a Database object.
Definition: GlobalFunctions.php:3650
$from
$from
Definition: importImages.php:90
RUN_MAINTENANCE_IF_MAIN
require_once RUN_MAINTENANCE_IF_MAIN
Definition: maintenance.txt:50
Maintenance
Abstract maintenance class for quickly writing and churning out maintenance scripts with minimal effo...
Definition: maintenance.txt:39
$wgContLang
this class mediates it Skin Encapsulates a look and feel for the wiki All of the functions that render HTML and make choices about how to render it are here and are called from various other places when and is meant to be subclassed with other skins that may override some of its functions The User object contains a reference to a and so rather than having a global skin object we just rely on the global User and get the skin with $wgUser and also has some character encoding functions and other locale stuff The current user interface language is instantiated as and the content language as $wgContLang
Definition: design.txt:56
conversion
This document describes the state of Postgres support in and is fairly well maintained The main code is very well while extensions are very hit and miss it is probably the most supported database after MySQL Much of the work in making MediaWiki database agnostic came about through the work of creating Postgres as and are nearing end of but without copying over all the usage comments General notes on the conversion
Definition: postgres.txt:22
title
to move a page</td >< td > &*You are moving the page across *A non empty talk page already exists under the new or *You uncheck the box below In those you will have to move or merge the page manually if desired</td >< td > be sure to &You are responsible for making sure that links continue to point where they are supposed to go Note that the page will &a page at the new title
Definition: All_system_messages.txt:2703
table
deferred txt A few of the database updates required by various functions here can be deferred until after the result page is displayed to the user For updating the view updating the linked to tables after a etc PHP does not yet have any way to tell the server to actually return and disconnect while still running these but it might have such a feature in the future We handle these by creating a deferred update object and putting those objects on a global then executing the whole list after the page is displayed We don t do anything smart like collating updates to the same table or such because the list is almost always going to have just one item on if so it s not worth the trouble Since there is a job queue in the jobs table
Definition: deferred.txt:11
array
the array() calling protocol came about after MediaWiki 1.4rc1.
List of Api Query prop modules.
global
when a variable name is used in a it is silently declared as a new masking the global
Definition: design.txt:93
list
deferred txt A few of the database updates required by various functions here can be deferred until after the result page is displayed to the user For updating the view updating the linked to tables after a etc PHP does not yet have any way to tell the server to actually return and disconnect while still running these but it might have such a feature in the future We handle these by creating a deferred update object and putting those objects on a global list
Definition: deferred.txt:11
Maintenance\DB_ADMIN
const DB_ADMIN
Definition: Maintenance.php:59
$title
presenting them properly to the user as errors is done by the caller $title
Definition: hooks.txt:1324
etc
design txt This is a brief overview of the new design More thorough and up to date information is available on the documentation wiki at etc Handles the details of getting and saving to the user table of the and dealing with sessions and cookies OutputPage Encapsulates the entire HTML page that will be sent in response to any server request It is used by calling its functions to add etc
Definition: design.txt:12
in
Prior to maintenance scripts were a hodgepodge of code that had no cohesion or formal method of action Beginning in
Definition: maintenance.txt:1
Maintenance\addArg
addArg( $arg, $description, $required=true)
Add some args that are needed.
Definition: Maintenance.php:207
as
This document is intended to provide useful advice for parties seeking to redistribute MediaWiki to end users It s targeted particularly at maintainers for Linux since it s been observed that distribution packages of MediaWiki often break We ve consistently had to recommend that users seeking support use official tarballs instead of their distribution s and this often solves whatever problem the user is having It would be nice if this could such as
Definition: distributors.txt:9
Maintenance\error
error( $err, $die=0)
Throw an error to the user.
Definition: Maintenance.php:333
Maintenance\output
output( $out, $channel=null)
Throw some output to the user.
Definition: Maintenance.php:314
from
Please log in again after you receive it</td >< td > s a saved copy from
Definition: All_system_messages.txt:3297
Maintenance\hasOption
hasOption( $name)
Checks to see if a particular param exists.
Definition: Maintenance.php:181
Maintenance\getArg
getArg( $argId=0, $default=null)
Get an argument.
Definition: Maintenance.php:246
$res
$res
Definition: database.txt:21
$type
$type
Definition: testCompression.php:46