MediaWiki  master
migrateArchiveText.php
Go to the documentation of this file.
1 <?php
24 require_once __DIR__ . '/Maintenance.php';
25 
33  public function __construct() {
34  parent::__construct();
35  $this->addDescription(
36  'Migrates content from pre-1.5 ar_text and ar_flags columns to text storage'
37  );
38  $this->addOption(
39  'replace-missing',
40  "For rows with missing or unloadable data, throw away whatever is there and\n"
41  . "mark them as \"error\" in the database."
42  );
43  }
44 
49  public function setForce( $forced = true ) {
50  $this->mOptions['force'] = $forced;
51  }
52 
53  protected function getUpdateKey() {
54  return __CLASS__;
55  }
56 
57  protected function doDBUpdates() {
58  $replaceMissing = $this->hasOption( 'replace-missing' );
59  $defaultExternalStore = $this->getConfig()->get( 'DefaultExternalStore' );
60  $batchSize = $this->getBatchSize();
61 
62  $dbr = $this->getDB( DB_REPLICA, [ 'vslow' ] );
63  $dbw = $this->getDB( DB_MASTER );
64  if ( !$dbr->fieldExists( 'archive', 'ar_text', __METHOD__ ) ||
65  !$dbw->fieldExists( 'archive', 'ar_text', __METHOD__ )
66  ) {
67  $this->output( "No ar_text field, so nothing to migrate.\n" );
68  return true;
69  }
70 
71  $this->output( "Migrating ar_text to modern storage...\n" );
72  $last = 0;
73  $count = 0;
74  $errors = 0;
75  while ( true ) {
76  $res = $dbr->select(
77  'archive',
78  [ 'ar_id', 'ar_text', 'ar_flags' ],
79  [
80  'ar_text_id' => null,
81  "ar_id > $last",
82  ],
83  __METHOD__,
84  [ 'LIMIT' => $batchSize, 'ORDER BY' => [ 'ar_id' ] ]
85  );
86  $numRows = $res->numRows();
87 
88  foreach ( $res as $row ) {
89  $last = $row->ar_id;
90 
91  // Recompress the text (and store in external storage, if
92  // applicable) if it's not already in external storage.
93  if ( !in_array( 'external', explode( ',', $row->ar_flags ), true ) ) {
94  $data = Revision::getRevisionText( $row, 'ar_' );
95  if ( $data !== false ) {
96  $flags = Revision::compressRevisionText( $data );
97 
98  if ( $defaultExternalStore ) {
99  $data = ExternalStore::insertToDefault( $data );
100  if ( $flags ) {
101  $flags .= ',';
102  }
103  $flags .= 'external';
104  }
105  } elseif ( $replaceMissing ) {
106  $this->error( "Replacing missing data for row ar_id=$row->ar_id" );
107  $data = 'Missing data in migrateArchiveText.php on ' . date( 'c' );
108  $flags = 'error';
109  } else {
110  $this->error( "No data for row ar_id=$row->ar_id" );
111  $errors++;
112  continue;
113  }
114  } else {
115  $flags = $row->ar_flags;
116  $data = $row->ar_text;
117  }
118 
119  $this->beginTransaction( $dbw, __METHOD__ );
120  $dbw->insert(
121  'text',
122  [ 'old_text' => $data, 'old_flags' => $flags ],
123  __METHOD__
124  );
125  $id = $dbw->insertId();
126  $dbw->update(
127  'archive',
128  [ 'ar_text_id' => $id, 'ar_text' => '', 'ar_flags' => '' ],
129  [ 'ar_id' => $row->ar_id, 'ar_text_id' => null ],
130  __METHOD__
131  );
132  $count += $dbw->affectedRows();
133  $this->commitTransaction( $dbw, __METHOD__ );
134  }
135 
136  if ( $numRows < $batchSize ) {
137  // We must have reached the end
138  break;
139  }
140 
141  $this->output( "... $last\n" );
142  // $this->commitTransaction() already waited for replication; no need to re-wait here
143  }
144 
145  $this->output( "Completed ar_text migration, $count rows updated, $errors missing data.\n" );
146  if ( $errors ) {
147  $this->output( "Run with --replace-missing to overwrite missing data with an error message.\n" );
148  }
149 
150  return $errors === 0;
151  }
152 }
153 
154 $maintClass = MigrateArchiveText::class;
155 require_once RUN_MAINTENANCE_IF_MAIN;
commitTransaction(IDatabase $dbw, $fname)
Commit the transcation on a DB handle and wait for replica DBs to catch up.
const RUN_MAINTENANCE_IF_MAIN
Definition: Maintenance.php:39
error( $err, $die=0)
Throw an error to the user.
hasOption( $name)
Checks to see if a particular option exists.
static insertToDefault( $data, array $params=[])
Like insert() above, but does more of the work for us.
const DB_MASTER
Definition: defines.php:26
static getRevisionText( $row, $prefix='old_', $wiki=false)
Get revision text associated with an old or archive row.
Definition: Revision.php:850
$last
addDescription( $text)
Set the description text.
static compressRevisionText(&$text)
If $wgCompressRevisions is enabled, we will compress data.
Definition: Revision.php:926
output( $out, $channel=null)
Throw some output to the user.
Maintenance script that migrates archive.ar_text and ar_flags to text storage.
Class for scripts that perform database maintenance and want to log the update in updatelog so we can...
setForce( $forced=true)
Sets whether a run of this maintenance script has the force parameter set.
getBatchSize()
Returns batch size.
addOption( $name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
const DB_REPLICA
Definition: defines.php:25
getDB( $db, $groups=[], $dbDomain=false)
Returns a database to be used by current maintenance script.
beginTransaction(IDatabase $dbw, $fname)
Begin a transcation on a DB.