MediaWiki  master
upgradeLogging.php
Go to the documentation of this file.
1 <?php
24 require __DIR__ . '/../commandLine.inc';
25 
28 
36 
40  public $dbw;
41  public $batchSize = 1000;
42  public $minTs = false;
43 
44  public function execute() {
45  $this->dbw = wfGetDB( DB_MASTER );
46  $logging = $this->dbw->tableName( 'logging' );
47  $logging_1_10 = $this->dbw->tableName( 'logging_1_10' );
48  $logging_pre_1_10 = $this->dbw->tableName( 'logging_pre_1_10' );
49 
50  if ( $this->dbw->tableExists( 'logging_pre_1_10', __METHOD__ )
51  && !$this->dbw->tableExists( 'logging', __METHOD__ )
52  ) {
53  # Fix previous aborted run
54  echo "Cleaning up from previous aborted run\n";
55  $this->dbw->query( "RENAME TABLE $logging_pre_1_10 TO $logging", __METHOD__ );
56  }
57 
58  if ( $this->dbw->tableExists( 'logging_pre_1_10', __METHOD__ ) ) {
59  echo "This script has already been run to completion\n";
60 
61  return;
62  }
63 
64  # Create the target table
65  if ( !$this->dbw->tableExists( 'logging_1_10', __METHOD__ ) ) {
66  global $wgDBTableOptions;
67 
68  $sql = <<<EOT
69 CREATE TABLE $logging_1_10 (
70  -- Log ID, for referring to this specific log entry, probably for deletion and such.
71  log_id int unsigned NOT NULL auto_increment,
72 
73  -- Symbolic keys for the general log type and the action type
74  -- within the log. The output format will be controlled by the
75  -- action field, but only the type controls categorization.
76  log_type varbinary(10) NOT NULL default '',
77  log_action varbinary(10) NOT NULL default '',
78 
79  -- Timestamp. Duh.
80  log_timestamp binary(14) NOT NULL default '19700101000000',
81 
82  -- The user who performed this action; key to user_id
83  log_user int unsigned NOT NULL default 0,
84 
85  -- Key to the page affected. Where a user is the target,
86  -- this will point to the user page.
87  log_namespace int NOT NULL default 0,
88  log_title varchar(255) binary NOT NULL default '',
89 
90  -- Freeform text. Interpreted as edit history comments.
91  log_comment varchar(255) NOT NULL default '',
92 
93  -- LF separated list of miscellaneous parameters
94  log_params blob NOT NULL,
95 
96  -- rev_deleted for logs
97  log_deleted tinyint unsigned NOT NULL default '0',
98 
99  PRIMARY KEY log_id (log_id),
100  KEY type_time (log_type, log_timestamp),
101  KEY user_time (log_user, log_timestamp),
102  KEY page_time (log_namespace, log_title, log_timestamp),
103  KEY times (log_timestamp)
104 
106 EOT;
107  echo "Creating table logging_1_10\n";
108  $this->dbw->query( $sql, __METHOD__ );
109  }
110 
111  # Synchronise the tables
112  echo "Doing initial sync...\n";
113  $this->sync( 'logging', 'logging_1_10' );
114  echo "Sync done\n\n";
115 
116  # Rename the old table away
117  echo "Renaming the old table to $logging_pre_1_10\n";
118  $this->dbw->query( "RENAME TABLE $logging TO $logging_pre_1_10", __METHOD__ );
119 
120  # Copy remaining old rows
121  # Done before the new table is active so that $copyPos is accurate
122  echo "Doing final sync...\n";
123  $this->sync( 'logging_pre_1_10', 'logging_1_10' );
124 
125  # Move the new table in
126  echo "Moving the new table in...\n";
127  $this->dbw->query( "RENAME TABLE $logging_1_10 TO $logging", __METHOD__ );
128  echo "Finished.\n";
129  }
130 
136  private function sync( $srcTable, $dstTable ) {
137  $batchSize = 1000;
138  $minTs = $this->dbw->selectField( $srcTable, 'MIN(log_timestamp)', '', __METHOD__ );
139  $minTsUnix = wfTimestamp( TS_UNIX, $minTs );
140  $numRowsCopied = 0;
141  $lbFactory = MediaWikiServices::getInstance()->getDBLoadBalancerFactory();
142 
143  while ( true ) {
144  $maxTs = $this->dbw->selectField( $srcTable, 'MAX(log_timestamp)', '', __METHOD__ );
145  $copyPos = $this->dbw->selectField( $dstTable, 'MAX(log_timestamp)', '', __METHOD__ );
146  $maxTsUnix = wfTimestamp( TS_UNIX, $maxTs );
147  $copyPosUnix = wfTimestamp( TS_UNIX, $copyPos );
148 
149  if ( $copyPos === null ) {
150  $percent = 0;
151  } else {
152  $percent = ( $copyPosUnix - $minTsUnix ) / ( $maxTsUnix - $minTsUnix ) * 100;
153  }
154  printf( "%s %.2f%%\n", $copyPos, $percent );
155 
156  # Handle all entries with timestamp equal to $copyPos
157  if ( $copyPos !== null ) {
158  $numRowsCopied += $this->copyExactMatch( $srcTable, $dstTable, $copyPos );
159  }
160 
161  # Now copy a batch of rows
162  if ( $copyPos === null ) {
163  $conds = false;
164  } else {
165  $conds = [ 'log_timestamp > ' . $this->dbw->addQuotes( $copyPos ) ];
166  }
167  $srcRes = $this->dbw->select( $srcTable, '*', $conds, __METHOD__,
168  [ 'LIMIT' => $batchSize, 'ORDER BY' => 'log_timestamp' ] );
169 
170  if ( !$srcRes->numRows() ) {
171  # All done
172  break;
173  }
174 
175  $batch = [];
176  foreach ( $srcRes as $srcRow ) {
177  $batch[] = (array)$srcRow;
178  }
179  $this->dbw->insert( $dstTable, $batch, __METHOD__ );
180  $numRowsCopied += count( $batch );
181 
182  $lbFactory->waitForReplication();
183  }
184  echo "Copied $numRowsCopied rows\n";
185  }
186 
187  private function copyExactMatch( $srcTable, $dstTable, $copyPos ) {
188  $numRowsCopied = 0;
189  $srcRes = $this->dbw->select( $srcTable, '*', [ 'log_timestamp' => $copyPos ], __METHOD__ );
190  $dstRes = $this->dbw->select( $dstTable, '*', [ 'log_timestamp' => $copyPos ], __METHOD__ );
191 
192  if ( $srcRes->numRows() ) {
193  $srcRow = $srcRes->fetchObject();
194  $srcFields = array_keys( (array)$srcRow );
195  $srcRes->seek( 0 );
196  $dstRowsSeen = [];
197 
198  # Make a hashtable of rows that already exist in the destination
199  foreach ( $dstRes as $dstRow ) {
200  $reducedDstRow = [];
201  foreach ( $srcFields as $field ) {
202  $reducedDstRow[$field] = $dstRow->$field;
203  }
204  $hash = md5( serialize( $reducedDstRow ) );
205  $dstRowsSeen[$hash] = true;
206  }
207 
208  # Copy all the source rows that aren't already in the destination
209  foreach ( $srcRes as $srcRow ) {
210  $hash = md5( serialize( (array)$srcRow ) );
211  if ( !isset( $dstRowsSeen[$hash] ) ) {
212  $this->dbw->insert( $dstTable, (array)$srcRow, __METHOD__ );
213  $numRowsCopied++;
214  }
215  }
216  }
217 
218  return $numRowsCopied;
219  }
220 }
221 
223 $ul->execute();
Wikimedia\Rdbms\Database
Relational database abstraction object.
Definition: Database.php:50
MediaWiki\MediaWikiServices
MediaWikiServices is the service locator for the application scope of MediaWiki.
Definition: MediaWikiServices.php:154
wfTimestamp
wfTimestamp( $outputtype=TS_UNIX, $ts=0)
Get a timestamp string in one of various formats.
Definition: GlobalFunctions.php:1808
UpdateLogging\$batchSize
$batchSize
Definition: upgradeLogging.php:41
serialize
serialize()
Definition: ApiMessageTrait.php:138
UpdateLogging\$minTs
$minTs
Definition: upgradeLogging.php:42
UpdateLogging\execute
execute()
Definition: upgradeLogging.php:44
wfGetDB
wfGetDB( $db, $groups=[], $wiki=false)
Get a Database object.
Definition: GlobalFunctions.php:2467
DB_MASTER
const DB_MASTER
Definition: defines.php:26
$ul
$ul
Definition: upgradeLogging.php:222
$wgDBTableOptions
$wgDBTableOptions
MySQL table options to use during installation or update.
Definition: DefaultSettings.php:2105
UpdateLogging\sync
sync( $srcTable, $dstTable)
Copy all rows from $srcTable to $dstTable.
Definition: upgradeLogging.php:136
UpdateLogging\copyExactMatch
copyExactMatch( $srcTable, $dstTable, $copyPos)
Definition: upgradeLogging.php:187
UpdateLogging\$dbw
Database $dbw
Definition: upgradeLogging.php:40
UpdateLogging
Maintenance script that upgrade for log_id/log_deleted fields in a replication-safe way.
Definition: upgradeLogging.php:35