25 require __DIR__ .
'/../commandLine.inc';
27 if ( count(
$args ) < 1 ) {
28 echo
"Usage: php trackBlobs.php <cluster> [... <cluster>]\n";
29 echo
"Adds blobs from a given ES cluster to the blob_tracking table\n";
30 echo
"Automatically deletes the tracking table and starts from the start again when restarted.\n";
48 if ( extension_loaded(
'gmp' ) ) {
49 $this->doBlobOrphans =
true;
51 $this->trackedBlobs[$cluster] = gmp_init( 0 );
54 echo
"Warning: the gmp extension is needed to find orphan blobs\n";
63 if ( $this->doBlobOrphans ) {
69 echo
"Doing integrity check...\n";
74 $exists =
$dbr->selectField(
'text', 1,
75 'old_flags LIKE \'%object%\' AND old_flags NOT LIKE \'%external%\' ' .
76 'AND LOWER(CONVERT(LEFT(old_text,22) USING latin1)) = \'o:15:"historyblobstub"\'',
81 echo
"Integrity check failed: found HistoryBlobStub objects in your text table.\n" .
82 "This script could destroy these objects if it continued. Run resolveStubs.php\n" .
88 $flags =
$dbr->selectField(
'archive',
'ar_flags',
89 'ar_flags LIKE \'%external%\' OR (' .
90 'ar_flags LIKE \'%object%\' ' .
91 'AND LOWER(CONVERT(LEFT(ar_text,22) USING latin1)) = \'o:15:"historyblobstub"\' )',
95 if ( strpos(
$flags,
'external' ) !==
false ) {
96 echo
"Integrity check failed: found external storage pointers in your archive table.\n" .
97 "Run normaliseArchiveTable.php to fix this.\n";
100 echo
"Integrity check failed: found HistoryBlobStub objects in your archive table.\n" .
101 "These objects are probably already broken, continuing would make them\n" .
102 "unrecoverable. Run \"normaliseArchiveTable.php --fix-cgz-bug\" to fix this.\n";
106 echo
"Integrity check OK\n";
111 if ( $dbw->tableExists(
'blob_tracking' ) ) {
112 $dbw->query(
'DROP TABLE ' . $dbw->tableName(
'blob_tracking' ) );
113 $dbw->query(
'DROP TABLE ' . $dbw->tableName(
'blob_orphans' ) );
115 $dbw->sourceFile( __DIR__ .
'/blob_tracking.sql' );
119 if ( !$this->textClause ) {
121 $this->textClause =
'';
122 foreach ( $this->clusters
as $cluster ) {
123 if ( $this->textClause !=
'' ) {
124 $this->textClause .=
' OR ';
126 $this->textClause .=
'old_text' .
$dbr->buildLike(
"DB://$cluster/",
$dbr->anyString() );
134 if ( !preg_match(
'!^DB://(\w+)/(\d+)(?:/([0-9a-fA-F]+)|)$!', $text, $m ) ) {
140 'id' => intval( $m[2] ),
141 'hash' => isset( $m[3] ) ? $m[3] : null
154 $endId =
$dbr->selectField(
'revision',
'MAX(rev_id)',
false, __METHOD__ );
158 echo
"Finding revisions...\n";
161 $res =
$dbr->select( [
'revision',
'text' ],
162 [
'rev_id',
'rev_page',
'old_id',
'old_flags',
'old_text' ],
164 'rev_id > ' .
$dbr->addQuotes( $startId ),
165 'rev_text_id=old_id',
167 'old_flags ' .
$dbr->buildLike(
$dbr->anyString(),
'external',
$dbr->anyString() ),
171 'ORDER BY' =>
'rev_id',
172 'LIMIT' => $this->batchSize
175 if ( !
$res->numRows() ) {
180 foreach (
$res as $row ) {
181 $startId = $row->rev_id;
184 echo
"Invalid DB:// URL in rev_id {$row->rev_id}\n";
187 if ( !in_array( $info[
'cluster'], $this->clusters ) ) {
188 echo
"Invalid cluster returned in SQL query: {$info['cluster']}\n";
192 'bt_page' => $row->rev_page,
193 'bt_rev_id' => $row->rev_id,
194 'bt_text_id' => $row->old_id,
195 'bt_cluster' => $info[
'cluster'],
196 'bt_blob_id' => $info[
'id'],
197 'bt_cgz_hash' => $info[
'hash']
199 if ( $this->doBlobOrphans ) {
200 gmp_setbit( $this->trackedBlobs[$info[
'cluster']], $info[
'id'] );
203 $dbw->insert(
'blob_tracking', $insertBatch, __METHOD__ );
204 $rowsInserted += count( $insertBatch );
207 if ( $batchesDone >= $this->reportingInterval ) {
209 echo
"$startId / $endId\n";
213 echo
"Found $rowsInserted revisions\n";
222 # Wait until the blob_tracking table is available in the replica DB
225 $pos = $dbw->getMasterPos();
226 $dbr->masterPosWait( $pos, 100000 );
230 $endId =
$dbr->selectField(
'text',
'MAX(old_id)',
false, __METHOD__ );
234 echo
"Finding orphan text...\n";
236 # Scan the text table for orphan text
238 $res =
$dbr->select( [
'text',
'blob_tracking' ],
239 [
'old_id',
'old_flags',
'old_text' ],
241 'old_id>' .
$dbr->addQuotes( $startId ),
243 'old_flags ' .
$dbr->buildLike(
$dbr->anyString(),
'external',
$dbr->anyString() ),
248 'ORDER BY' =>
'old_id',
249 'LIMIT' => $this->batchSize
251 [
'blob_tracking' => [
'LEFT JOIN',
'bt_text_id=old_id' ] ]
254 foreach (
$res as $row ) {
255 $ids[] = $row->old_id;
258 if ( !
$res->numRows() ) {
263 foreach (
$res as $row ) {
264 $startId = $row->old_id;
267 echo
"Invalid DB:// URL in old_id {$row->old_id}\n";
270 if ( !in_array( $info[
'cluster'], $this->clusters ) ) {
271 echo
"Invalid cluster returned in SQL query\n";
278 'bt_text_id' => $row->old_id,
279 'bt_cluster' => $info[
'cluster'],
280 'bt_blob_id' => $info[
'id'],
281 'bt_cgz_hash' => $info[
'hash']
283 if ( $this->doBlobOrphans ) {
284 gmp_setbit( $this->trackedBlobs[$info[
'cluster']], $info[
'id'] );
287 $dbw->insert(
'blob_tracking', $insertBatch, __METHOD__ );
289 $rowsInserted += count( $insertBatch );
291 if ( $batchesDone >= $this->reportingInterval ) {
293 echo
"$startId / $endId\n";
297 echo
"Found $rowsInserted orphan text rows\n";
308 if ( !extension_loaded(
'gmp' ) ) {
309 echo
"Can't find orphan blobs, need bitfield support provided by GMP.\n";
316 foreach ( $this->clusters
as $cluster ) {
317 echo
"Searching for orphan blobs in $cluster...\n";
322 if ( strpos( $e->error,
'Unknown database' ) !==
false ) {
323 echo
"No database on $cluster\n";
325 echo
"Error on $cluster: " . $e->getMessage() .
"\n";
329 $table = $extDB->getLBInfo(
'blobs table' );
330 if ( is_null( $table ) ) {
333 if ( !$extDB->tableExists( $table ) ) {
334 echo
"No blobs table on cluster $cluster\n";
339 $actualBlobs = gmp_init( 0 );
340 $endId = $extDB->selectField( $table,
'MAX(blob_id)',
false, __METHOD__ );
344 $res = $extDB->select( $table,
346 [
'blob_id > ' . $extDB->addQuotes( $startId ) ],
351 if ( !
$res->numRows() ) {
355 foreach (
$res as $row ) {
356 gmp_setbit( $actualBlobs, $row->blob_id );
358 $startId = $row->blob_id;
361 if ( $batchesDone >= $this->reportingInterval ) {
363 echo
"$startId / $endId\n";
369 $orphans = gmp_and( $actualBlobs, gmp_com( $this->trackedBlobs[$cluster] ) );
376 $id = gmp_scan1( $orphans, $id );
381 'bo_cluster' => $cluster,
384 if ( count( $insertBatch ) > $this->batchSize ) {
385 $dbw->insert(
'blob_orphans', $insertBatch, __METHOD__ );
392 if ( $insertBatch ) {
393 $dbw->insert(
'blob_orphans', $insertBatch, __METHOD__ );
395 echo
"Found $numOrphans orphan(s) in $cluster\n";
wfGetDB($db, $groups=[], $wiki=false)
Get a Database object.
wfWaitForSlaves($ifWritesSince=null, $wiki=false, $cluster=false, $timeout=null)
Waits for the replica DBs to catch up to the master position.
div flags Integer display flags(NO_ACTION_LINK, NO_EXTRA_USER_LINKS) 'LogException'returning false will NOT prevent logging $e
it s the revision text itself In either if gzip is the revision text is gzipped $flags
trackRevisions()
Scan the revision table for rows stored in the specified clusters.
trackOrphanText()
Scan the text table for orphan text Orphan text here does not imply DB corruption – deleted text tra...
if(count($args)< 1) $tracker
findOrphanBlobs()
Scan the blobs table for rows not registered in blob_tracking (and thus not registered in the text ta...
This document is intended to provide useful advice for parties seeking to redistribute MediaWiki to end users It s targeted particularly at maintainers for Linux since it s been observed that distribution packages of MediaWiki often break We ve consistently had to recommend that users seeking support use official tarballs instead of their distribution s and this often solves whatever problem the user is having It would be nice if this could such as
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency which acts as the top level factory for services in MediaWiki which can be used to gain access to default instances of various services MediaWikiServices however also allows new services to be defined and default services to be redefined Services are defined or redefined by providing a callback the instantiator that will return a new instance of the service When it will create an instance of MediaWikiServices and populate it with the services defined in the files listed by thereby bootstrapping the DI framework Per $wgServiceWiringFiles lists includes ServiceWiring php
wfGetLBFactory()
Get the load balancer factory object.