MediaWiki  1.34.0
migrateArchiveText.php
Go to the documentation of this file.
1 <?php
25 
26 require_once __DIR__ . '/Maintenance.php';
27 
35  public function __construct() {
36  parent::__construct();
37  $this->addDescription(
38  'Migrates content from pre-1.5 ar_text and ar_flags columns to text storage'
39  );
40  $this->addOption(
41  'replace-missing',
42  "For rows with missing or unloadable data, throw away whatever is there and\n"
43  . "mark them as \"error\" in the database."
44  );
45  }
46 
51  public function setForce( $forced = true ) {
52  $this->mOptions['force'] = $forced;
53  }
54 
55  protected function getUpdateKey() {
56  return __CLASS__;
57  }
58 
59  protected function doDBUpdates() {
60  $replaceMissing = $this->hasOption( 'replace-missing' );
61  $defaultExternalStore = $this->getConfig()->get( 'DefaultExternalStore' );
62  // @phan-suppress-next-line PhanAccessMethodInternal
63  $blobStore = MediaWikiServices::getInstance()
64  ->getBlobStoreFactory()
65  ->newSqlBlobStore();
66  $batchSize = $this->getBatchSize();
67 
68  $dbr = $this->getDB( DB_REPLICA, [ 'vslow' ] );
69  $dbw = $this->getDB( DB_MASTER );
70  if ( !$dbr->fieldExists( 'archive', 'ar_text', __METHOD__ ) ||
71  !$dbw->fieldExists( 'archive', 'ar_text', __METHOD__ )
72  ) {
73  $this->output( "No ar_text field, so nothing to migrate.\n" );
74  return true;
75  }
76 
77  $this->output( "Migrating ar_text to modern storage...\n" );
78  $last = 0;
79  $count = 0;
80  $errors = 0;
81  while ( true ) {
82  $res = $dbr->select(
83  'archive',
84  [ 'ar_id', 'ar_text', 'ar_flags' ],
85  [
86  'ar_text_id' => null,
87  "ar_id > $last",
88  ],
89  __METHOD__,
90  [ 'LIMIT' => $batchSize, 'ORDER BY' => [ 'ar_id' ] ]
91  );
92  $numRows = $res->numRows();
93 
94  foreach ( $res as $row ) {
95  $last = $row->ar_id;
96 
97  // Recompress the text (and store in external storage, if
98  // applicable) if it's not already in external storage.
99  $arFlags = explode( ',', $row->ar_flags );
100  if ( !in_array( 'external', $arFlags, true ) ) {
101  $data = $blobStore->decompressData( $row->ar_text, $arFlags );
102  if ( $data !== false ) {
103  $flags = Revision::compressRevisionText( $data );
104 
105  if ( $defaultExternalStore ) {
106  $data = ExternalStore::insertToDefault( $data );
107  if ( $flags ) {
108  $flags .= ',';
109  }
110  $flags .= 'external';
111  }
112  } elseif ( $replaceMissing ) {
113  $this->error( "Replacing missing data for row ar_id=$row->ar_id" );
114  $data = 'Missing data in migrateArchiveText.php on ' . date( 'c' );
115  $flags = 'error';
116  } else {
117  $this->error( "No data for row ar_id=$row->ar_id" );
118  $errors++;
119  continue;
120  }
121  } else {
122  $flags = $row->ar_flags;
123  $data = $row->ar_text;
124  }
125 
126  $this->beginTransaction( $dbw, __METHOD__ );
127  $dbw->insert(
128  'text',
129  [ 'old_text' => $data, 'old_flags' => $flags ],
130  __METHOD__
131  );
132  $id = $dbw->insertId();
133  $dbw->update(
134  'archive',
135  [ 'ar_text_id' => $id, 'ar_text' => '', 'ar_flags' => '' ],
136  [ 'ar_id' => $row->ar_id, 'ar_text_id' => null ],
137  __METHOD__
138  );
139  $count += $dbw->affectedRows();
140  $this->commitTransaction( $dbw, __METHOD__ );
141  }
142 
143  if ( $numRows < $batchSize ) {
144  // We must have reached the end
145  break;
146  }
147 
148  $this->output( "... $last\n" );
149  // $this->commitTransaction() already waited for replication; no need to re-wait here
150  }
151 
152  $this->output( "Completed ar_text migration, $count rows updated, $errors missing data.\n" );
153  if ( $errors ) {
154  $this->output( "Run with --replace-missing to overwrite missing data with an error message.\n" );
155  }
156 
157  return $errors === 0;
158  }
159 }
160 
161 $maintClass = MigrateArchiveText::class;
162 require_once RUN_MAINTENANCE_IF_MAIN;
RUN_MAINTENANCE_IF_MAIN
const RUN_MAINTENANCE_IF_MAIN
Definition: Maintenance.php:39
ExternalStore\insertToDefault
static insertToDefault( $data, array $params=[])
Like insert() above, but does more of the work for us.
Definition: ExternalStore.php:135
MediaWiki\MediaWikiServices
MediaWikiServices is the service locator for the application scope of MediaWiki.
Definition: MediaWikiServices.php:117
MigrateArchiveText\__construct
__construct()
Default constructor.
Definition: migrateArchiveText.php:35
Maintenance\addDescription
addDescription( $text)
Set the description text.
Definition: Maintenance.php:348
$last
$last
Definition: profileinfo.php:419
MigrateArchiveText
Maintenance script that migrates archive.ar_text and ar_flags to text storage.
Definition: migrateArchiveText.php:34
$res
$res
Definition: testCompression.php:52
$dbr
$dbr
Definition: testCompression.php:50
Maintenance\beginTransaction
beginTransaction(IDatabase $dbw, $fname)
Begin a transcation on a DB.
Definition: Maintenance.php:1426
MigrateArchiveText\getUpdateKey
getUpdateKey()
Get the update key name to go in the update log table.
Definition: migrateArchiveText.php:55
Maintenance\getConfig
getConfig()
Definition: Maintenance.php:613
LoggedUpdateMaintenance
Class for scripts that perform database maintenance and want to log the update in updatelog so we can...
Definition: Maintenance.php:1727
Revision\compressRevisionText
static compressRevisionText(&$text)
If $wgCompressRevisions is enabled, we will compress data.
Definition: Revision.php:926
Maintenance\addOption
addOption( $name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
Definition: Maintenance.php:267
DB_REPLICA
const DB_REPLICA
Definition: defines.php:25
DB_MASTER
const DB_MASTER
Definition: defines.php:26
MigrateArchiveText\setForce
setForce( $forced=true)
Sets whether a run of this maintenance script has the force parameter set.
Definition: migrateArchiveText.php:51
Maintenance\commitTransaction
commitTransaction(IDatabase $dbw, $fname)
Commit the transcation on a DB handle and wait for replica DBs to catch up.
Definition: Maintenance.php:1441
Maintenance\getDB
getDB( $db, $groups=[], $dbDomain=false)
Returns a database to be used by current maintenance script.
Definition: Maintenance.php:1396
Maintenance\getBatchSize
getBatchSize()
Returns batch size.
Definition: Maintenance.php:386
Maintenance\error
error( $err, $die=0)
Throw an error to the user.
Definition: Maintenance.php:481
Maintenance\output
output( $out, $channel=null)
Throw some output to the user.
Definition: Maintenance.php:453
Maintenance\hasOption
hasOption( $name)
Checks to see if a particular option exists.
Definition: Maintenance.php:288
MigrateArchiveText\doDBUpdates
doDBUpdates()
Do the actual work.
Definition: migrateArchiveText.php:59
$maintClass
$maintClass
Definition: migrateArchiveText.php:161