MediaWiki  REL1_33
categoryChangesAsRdf.php
Go to the documentation of this file.
1 <?php
19 use Wikimedia\Purtle\RdfWriter;
20 use Wikimedia\Purtle\TurtleRdfWriter;
22 
23 require_once __DIR__ . '/Maintenance.php';
24 
35  const SPARQL_INSERT = <<<SPARQL
36 INSERT DATA {
37 %s
38 };
39 
40 SPARQL;
41 
45  const SPARQL_DELETE = <<<SPARQLD
46 DELETE {
47 ?category ?x ?y
48 } WHERE {
49  VALUES ?category {
50  %s
51  }
52 };
53 
54 SPARQLD;
55 
59  const SPARQL_DELETE_INSERT = <<<SPARQLDI
60 DELETE {
61 ?category ?x ?y
62 } INSERT {
63 %s
64 } WHERE {
65  VALUES ?category {
66  %s
67  }
68 };
69 
70 SPARQLDI;
71 
75  private $rdfWriter;
80  private $categoriesRdf;
81 
82  private $startTS;
83  private $endTS;
84 
90  protected $processed = [];
91 
92  public function __construct() {
93  parent::__construct();
94 
95  $this->addDescription( "Generate RDF dump of category changes in a wiki." );
96 
97  $this->setBatchSize( 200 );
98  $this->addOption( 'output', "Output file (default is stdout). Will be overwritten.", false,
99  true, 'o' );
100  $this->addOption( 'start', 'Starting timestamp (inclusive), in ISO or Mediawiki format.',
101  true, true, 's' );
102  $this->addOption( 'end', 'Ending timestamp (exclusive), in ISO or Mediawiki format.', true,
103  true, 'e' );
104  }
105 
109  public function initialize() {
110  // SPARQL Update syntax is close to Turtle format, so we can use Turtle writer.
111  $this->rdfWriter = new TurtleRdfWriter();
112  $this->categoriesRdf = new CategoriesRdf( $this->rdfWriter );
113  }
114 
115  public function execute() {
116  global $wgRCMaxAge;
117 
118  $this->initialize();
119  $startTS = new MWTimestamp( $this->getOption( "start" ) );
120 
121  $endTS = new MWTimestamp( $this->getOption( "end" ) );
122  $now = new MWTimestamp();
123 
124  if ( $now->getTimestamp() - $startTS->getTimestamp() > $wgRCMaxAge ) {
125  $this->error( "Start timestamp too old, maximum RC age is $wgRCMaxAge!" );
126  }
127  if ( $now->getTimestamp() - $endTS->getTimestamp() > $wgRCMaxAge ) {
128  $this->error( "End timestamp too old, maximum RC age is $wgRCMaxAge!" );
129  }
130 
131  $this->startTS = $startTS->getTimestamp();
132  $this->endTS = $endTS->getTimestamp();
133 
134  $outFile = $this->getOption( 'output', 'php://stdout' );
135  if ( $outFile === '-' ) {
136  $outFile = 'php://stdout';
137  }
138 
139  $output = fopen( $outFile, 'wb' );
140 
141  $this->categoriesRdf->setupPrefixes();
142  $this->rdfWriter->start();
143 
144  $prefixes = $this->getRdf();
145  // We have to strip @ from prefix, since SPARQL UPDATE doesn't use them
146  // Also strip dot at the end.
147  $prefixes = preg_replace( [ '/^@/m', '/\s*[.]$/m' ], '', $prefixes );
148  fwrite( $output, $prefixes );
149 
150  $dbr = $this->getDB( DB_REPLICA, [ 'vslow' ] );
151 
152  // Deletes go first because if the page was deleted, other changes
153  // do not matter. This only gets true deletes, i.e. not pages that were restored.
154  $this->handleDeletes( $dbr, $output );
155  // Moves go before additions because if category is moved, we should not process creation
156  // as it would produce wrong data - because create row has old title
157  $this->handleMoves( $dbr, $output );
158  // We need to handle restores too since delete may have happened in previous update.
159  $this->handleRestores( $dbr, $output );
160  // Process newly added pages
161  $this->handleAdds( $dbr, $output );
162  // Process page edits
163  $this->handleEdits( $dbr, $output );
164  // Process categorization changes
165  $this->handleCategorization( $dbr, $output );
166 
167  // Update timestamp
168  fwrite( $output, $this->updateTS( $this->endTS ) );
169  }
170 
175  private function getInsertRdf() {
176  $rdfText = $this->getRdf();
177  if ( !$rdfText ) {
178  return "";
179  }
180  return sprintf( self::SPARQL_INSERT, $rdfText );
181  }
182 
191  private function getCategoriesUpdate( IDatabase $dbr, $deleteUrls, $pages, $mark ) {
192  if ( empty( $deleteUrls ) ) {
193  return "";
194  }
195 
196  if ( !empty( $pages ) ) {
197  $this->writeParentCategories( $dbr, $pages );
198  }
199 
200  return "# $mark\n" . sprintf( self::SPARQL_DELETE, implode( ' ', $deleteUrls ) ) .
201  $this->getInsertRdf();
202  }
203 
210  private function writeParentCategories( IDatabase $dbr, $pages ) {
211  foreach ( $this->getCategoryLinksIterator( $dbr, array_keys( $pages ) ) as $row ) {
212  $this->categoriesRdf->writeCategoryLinkData( $pages[$row->cl_from], $row->cl_to );
213  }
214  }
215 
221  public function updateTS( $timestamp ) {
222  $dumpUrl = '<' . $this->categoriesRdf->getDumpURI() . '>';
223  $ts = wfTimestamp( TS_ISO_8601, $timestamp );
224  $tsQuery = <<<SPARQL
225 DELETE {
226  $dumpUrl schema:dateModified ?o .
227 }
228 WHERE {
229  $dumpUrl schema:dateModified ?o .
230 };
231 INSERT DATA {
232  $dumpUrl schema:dateModified "$ts"^^xsd:dateTime .
233 }
234 
235 SPARQL;
236  return $tsQuery;
237  }
238 
246  private function setupChangesIterator(
247  IDatabase $dbr,
248  array $columns = [],
249  array $extra_tables = []
250  ) {
251  $tables = [ 'recentchanges', 'page_props', 'category' ];
252  if ( $extra_tables ) {
253  $tables = array_merge( $tables, $extra_tables );
254  }
255  $it = new BatchRowIterator( $dbr,
256  $tables,
257  [ 'rc_timestamp' ],
258  $this->mBatchSize
259  );
260  $this->addTimestampConditions( $it, $dbr );
261  $it->addJoinConditions(
262  [
263  'page_props' => [
264  'LEFT JOIN', [ 'pp_propname' => 'hiddencat', 'pp_page = rc_cur_id' ]
265  ],
266  'category' => [
267  'LEFT JOIN', [ 'cat_title = rc_title' ]
268  ]
269  ]
270  );
271  $it->setFetchColumns( array_merge( $columns, [
272  'rc_title',
273  'rc_cur_id',
274  'pp_propname',
275  'cat_pages',
276  'cat_subcats',
277  'cat_files'
278  ] ) );
279  return $it;
280  }
281 
287  protected function getNewCatsIterator( IDatabase $dbr ) {
288  $it = $this->setupChangesIterator( $dbr );
289  $it->addConditions( [
290  'rc_namespace' => NS_CATEGORY,
291  'rc_new' => 1,
292  ] );
293  return $it;
294  }
295 
301  protected function getMovedCatsIterator( IDatabase $dbr ) {
302  $it = $this->setupChangesIterator( $dbr, [ 'page_title', 'page_namespace' ], [ 'page' ] );
303  $it->addConditions( [
304  'rc_namespace' => NS_CATEGORY,
305  'rc_new' => 0,
306  'rc_log_type' => 'move',
307  'rc_type' => RC_LOG,
308  ] );
309  $it->addJoinConditions( [
310  'page' => [ 'JOIN', 'rc_cur_id = page_id' ],
311  ] );
312  $this->addIndex( $it );
313  return $it;
314  }
315 
321  protected function getDeletedCatsIterator( IDatabase $dbr ) {
322  $it = new BatchRowIterator( $dbr,
323  'recentchanges',
324  [ 'rc_timestamp' ],
325  $this->mBatchSize
326  );
327  $this->addTimestampConditions( $it, $dbr );
328  $it->addConditions( [
329  'rc_namespace' => NS_CATEGORY,
330  'rc_new' => 0,
331  'rc_log_type' => 'delete',
332  'rc_log_action' => 'delete',
333  'rc_type' => RC_LOG,
334  // We will fetch ones that do not have page record. If they do,
335  // this means they were restored, thus restoring handler will pick it up.
336  'NOT EXISTS (SELECT * FROM page WHERE page_id = rc_cur_id)',
337  ] );
338  $this->addIndex( $it );
339  $it->setFetchColumns( [ 'rc_cur_id', 'rc_title' ] );
340  return $it;
341  }
342 
348  protected function getRestoredCatsIterator( IDatabase $dbr ) {
349  $it = $this->setupChangesIterator( $dbr );
350  $it->addConditions( [
351  'rc_namespace' => NS_CATEGORY,
352  'rc_new' => 0,
353  'rc_log_type' => 'delete',
354  'rc_log_action' => 'restore',
355  'rc_type' => RC_LOG,
356  // We will only fetch ones that have page record
357  'EXISTS (SELECT page_id FROM page WHERE page_id = rc_cur_id)',
358  ] );
359  $this->addIndex( $it );
360  return $it;
361  }
362 
368  protected function getChangedCatsIterator( IDatabase $dbr, $type ) {
369  $it =
370  $this->setupChangesIterator( $dbr );
371  $it->addConditions( [
372  'rc_namespace' => NS_CATEGORY,
373  'rc_new' => 0,
374  'rc_type' => $type,
375  ] );
376  $this->addIndex( $it );
377  return $it;
378  }
379 
386  $it->addConditions( [
387  'rc_timestamp >= ' . $dbr->addQuotes( $dbr->timestamp( $this->startTS ) ),
388  'rc_timestamp < ' . $dbr->addQuotes( $dbr->timestamp( $this->endTS ) ),
389  ] );
390  }
391 
396  private function addIndex( BatchRowIterator $it ) {
397  $it->addOptions( [
398  'USE INDEX' => [ 'recentchanges' => 'new_name_timestamp' ]
399  ] );
400  }
401 
408  protected function getCategoryLinksIterator( IDatabase $dbr, array $ids ) {
409  $it = new BatchRowIterator(
410  $dbr,
411  'categorylinks',
412  [ 'cl_from', 'cl_to' ],
413  $this->mBatchSize
414  );
415  $it->addConditions( [
416  'cl_type' => 'subcat',
417  'cl_from' => $ids
418  ] );
419  $it->setFetchColumns( [ 'cl_from', 'cl_to' ] );
420  return new RecursiveIteratorIterator( $it );
421  }
422 
427  public function getRdf() {
428  return $this->rdfWriter->drain();
429  }
430 
436  public function handleDeletes( IDatabase $dbr, $output ) {
437  // This only does "true" deletes - i.e. those that the page stays deleted
438  foreach ( $this->getDeletedCatsIterator( $dbr ) as $batch ) {
439  $deleteUrls = [];
440  foreach ( $batch as $row ) {
441  // This can produce duplicates, we don't care
442  $deleteUrls[] = '<' . $this->categoriesRdf->labelToUrl( $row->rc_title ) . '>';
443  $this->processed[$row->rc_cur_id] = true;
444  }
445  fwrite( $output, $this->getCategoriesUpdate( $dbr, $deleteUrls, [], "Deletes" ) );
446  }
447  }
448 
453  private function writeCategoryData( $row ) {
454  $this->categoriesRdf->writeCategoryData(
455  $row->rc_title,
456  $row->pp_propname === 'hiddencat',
457  (int)$row->cat_pages - (int)$row->cat_subcats - (int)$row->cat_files,
458  (int)$row->cat_subcats
459  );
460  }
461 
466  public function handleMoves( IDatabase $dbr, $output ) {
467  foreach ( $this->getMovedCatsIterator( $dbr ) as $batch ) {
468  $pages = [];
469  $deleteUrls = [];
470  foreach ( $batch as $row ) {
471  $deleteUrls[] = '<' . $this->categoriesRdf->labelToUrl( $row->rc_title ) . '>';
472 
473  if ( isset( $this->processed[$row->rc_cur_id] ) ) {
474  // We already captured this one before
475  continue;
476  }
477 
478  if ( $row->page_namespace != NS_CATEGORY ) {
479  // If page was moved out of Category:, we'll just delete
480  continue;
481  }
482  $row->rc_title = $row->page_title;
483  $this->writeCategoryData( $row );
484  $pages[$row->rc_cur_id] = $row->page_title;
485  $this->processed[$row->rc_cur_id] = true;
486  }
487 
488  fwrite( $output, $this->getCategoriesUpdate( $dbr, $deleteUrls, $pages, "Moves" ) );
489  }
490  }
491 
496  public function handleRestores( IDatabase $dbr, $output ) {
497  fwrite( $output, "# Restores\n" );
498  // This will only find those restores that were not deleted later.
499  foreach ( $this->getRestoredCatsIterator( $dbr ) as $batch ) {
500  $pages = [];
501  foreach ( $batch as $row ) {
502  if ( isset( $this->processed[$row->rc_cur_id] ) ) {
503  // We already captured this one before
504  continue;
505  }
506  $this->writeCategoryData( $row );
507  $pages[$row->rc_cur_id] = $row->rc_title;
508  $this->processed[$row->rc_cur_id] = true;
509  }
510 
511  if ( empty( $pages ) ) {
512  continue;
513  }
514 
515  $this->writeParentCategories( $dbr, $pages );
516 
517  fwrite( $output, $this->getInsertRdf() );
518  }
519  }
520 
525  public function handleAdds( IDatabase $dbr, $output ) {
526  fwrite( $output, "# Additions\n" );
527  foreach ( $this->getNewCatsIterator( $dbr ) as $batch ) {
528  $pages = [];
529  foreach ( $batch as $row ) {
530  if ( isset( $this->processed[$row->rc_cur_id] ) ) {
531  // We already captured this one before
532  continue;
533  }
534  $this->writeCategoryData( $row );
535  $pages[$row->rc_cur_id] = $row->rc_title;
536  $this->processed[$row->rc_cur_id] = true;
537  }
538 
539  if ( empty( $pages ) ) {
540  continue;
541  }
542 
543  $this->writeParentCategories( $dbr, $pages );
544  fwrite( $output, $this->getInsertRdf() );
545  }
546  }
547 
553  public function handleEdits( IDatabase $dbr, $output ) {
554  // Editing category can change hidden flag and add new parents.
555  // TODO: it's pretty expensive to update all edited categories, and most edits
556  // aren't actually interesting for us. Some way to know which are interesting?
557  // We can capture recategorization on the next step, but not change in hidden status.
558  foreach ( $this->getChangedCatsIterator( $dbr, RC_EDIT ) as $batch ) {
559  $pages = [];
560  $deleteUrls = [];
561  foreach ( $batch as $row ) {
562  // Note that on categorization event, cur_id points to
563  // the child page, not the parent category!
564  if ( isset( $this->processed[$row->rc_cur_id] ) ) {
565  // We already captured this one before
566  continue;
567  }
568  $this->writeCategoryData( $row );
569  $pages[$row->rc_cur_id] = $row->rc_title;
570  $this->processed[$row->rc_cur_id] = true;
571  $deleteUrls[] = '<' . $this->categoriesRdf->labelToUrl( $row->rc_title ) . '>';
572  }
573 
574  fwrite( $output, $this->getCategoriesUpdate( $dbr, $deleteUrls, $pages, "Edits" ) );
575  }
576  }
577 
584  $processedTitle = [];
585  // Categorization change can add new parents and change counts
586  // for the parent category.
587  foreach ( $this->getChangedCatsIterator( $dbr, RC_CATEGORIZE ) as $batch ) {
588  /*
589  * Note that on categorization event, cur_id points to
590  * the child page, not the parent category!
591  * So we need to have a two-stage process, since we have ID from one
592  * category and title from another, and we need both for proper updates.
593  * TODO: For now, we do full update even though some data hasn't changed,
594  * e.g. parents for parent cat and counts for child cat.
595  */
596  foreach ( $batch as $row ) {
597  $childPages[$row->rc_cur_id] = true;
598  $parentCats[$row->rc_title] = true;
599  }
600 
601  $joinConditions = [
602  'page_props' => [
603  'LEFT JOIN',
604  [ 'pp_propname' => 'hiddencat', 'pp_page = page_id' ],
605  ],
606  'category' => [
607  'LEFT JOIN',
608  [ 'cat_title = page_title' ],
609  ],
610  ];
611 
612  $pages = [];
613  $deleteUrls = [];
614 
615  if ( !empty( $childPages ) ) {
616  // Load child rows by ID
617  $childRows = $dbr->select(
618  [ 'page', 'page_props', 'category' ],
619  [
620  'page_id',
621  'rc_title' => 'page_title',
622  'pp_propname',
623  'cat_pages',
624  'cat_subcats',
625  'cat_files',
626  ],
627  [ 'page_namespace' => NS_CATEGORY, 'page_id' => array_keys( $childPages ) ],
628  __METHOD__,
629  [],
630  $joinConditions
631  );
632  foreach ( $childRows as $row ) {
633  if ( isset( $this->processed[$row->page_id] ) ) {
634  // We already captured this one before
635  continue;
636  }
637  $this->writeCategoryData( $row );
638  $deleteUrls[] = '<' . $this->categoriesRdf->labelToUrl( $row->rc_title ) . '>';
639  $this->processed[$row->page_id] = true;
640  }
641  }
642 
643  if ( !empty( $parentCats ) ) {
644  // Load parent rows by title
645  $joinConditions = [
646  'page' => [
647  'LEFT JOIN',
648  [ 'page_title = cat_title', 'page_namespace' => NS_CATEGORY ],
649  ],
650  'page_props' => [
651  'LEFT JOIN',
652  [ 'pp_propname' => 'hiddencat', 'pp_page = page_id' ],
653  ],
654  ];
655 
656  $parentRows = $dbr->select(
657  [ 'category', 'page', 'page_props' ],
658  [
659  'page_id',
660  'rc_title' => 'cat_title',
661  'pp_propname',
662  'cat_pages',
663  'cat_subcats',
664  'cat_files',
665  ],
666  [ 'cat_title' => array_keys( $parentCats ) ],
667  __METHOD__,
668  [],
669  $joinConditions
670  );
671  foreach ( $parentRows as $row ) {
672  if ( $row->page_id && isset( $this->processed[$row->page_id] ) ) {
673  // We already captured this one before
674  continue;
675  }
676  if ( isset( $processedTitle[$row->rc_title] ) ) {
677  // We already captured this one before
678  continue;
679  }
680  $this->writeCategoryData( $row );
681  $deleteUrls[] = '<' . $this->categoriesRdf->labelToUrl( $row->rc_title ) . '>';
682  if ( $row->page_id ) {
683  $this->processed[$row->page_id] = true;
684  }
685  $processedTitle[$row->rc_title] = true;
686  }
687  }
688 
689  fwrite( $output, $this->getCategoriesUpdate( $dbr, $deleteUrls, $pages, "Changes" ) );
690  }
691  }
692 }
693 
695 require_once RUN_MAINTENANCE_IF_MAIN;
CategoryChangesAsRdf\getDeletedCatsIterator
getDeletedCatsIterator(IDatabase $dbr)
Fetch deleted categories.
Definition: categoryChangesAsRdf.php:321
CategoryChangesAsRdf\getRdf
getRdf()
Get accumulated RDF.
Definition: categoryChangesAsRdf.php:427
MWTimestamp
Library for creating and parsing MW-style timestamps.
Definition: MWTimestamp.php:32
CategoriesRdf
Helper class to produce RDF representation of categories.
Definition: CategoriesRdf.php:24
CategoryChangesAsRdf\getInsertRdf
getInsertRdf()
Get the text of SPARQL INSERT DATA clause.
Definition: categoryChangesAsRdf.php:175
use
Apache License January AND DISTRIBUTION Definitions License shall mean the terms and conditions for use
Definition: APACHE-LICENSE-2.0.txt:10
CategoryChangesAsRdf\writeParentCategories
writeParentCategories(IDatabase $dbr, $pages)
Write parent data for a set of categories.
Definition: categoryChangesAsRdf.php:210
Maintenance\addDescription
addDescription( $text)
Set the description text.
Definition: Maintenance.php:329
$tables
this hook is for auditing only RecentChangesLinked and Watchlist Do not use this to implement individual filters if they are compatible with the ChangesListFilter and ChangesListFilterGroup structure use sub classes of those in conjunction with the ChangesListSpecialPageStructuredFilters hook This hook can be used to implement filters that do not implement that or custom behavior that is not an individual filter e g Watchlist & $tables
Definition: hooks.txt:996
CategoryChangesAsRdf\getNewCatsIterator
getNewCatsIterator(IDatabase $dbr)
Fetch newly created categories.
Definition: categoryChangesAsRdf.php:287
wfTimestamp
wfTimestamp( $outputtype=TS_UNIX, $ts=0)
Get a timestamp string in one of various formats.
Definition: GlobalFunctions.php:1924
BatchRowIterator\addConditions
addConditions(array $conditions)
Definition: BatchRowIterator.php:111
RC_LOG
const RC_LOG
Definition: Defines.php:153
BatchRowIterator
Definition: BatchRowIterator.php:29
RUN_MAINTENANCE_IF_MAIN
require_once RUN_MAINTENANCE_IF_MAIN
Definition: maintenance.txt:50
RC_EDIT
const RC_EDIT
Definition: Defines.php:151
Maintenance
Abstract maintenance class for quickly writing and churning out maintenance scripts with minimal effo...
Definition: maintenance.txt:39
CategoryChangesAsRdf\$processed
int[] $processed
List of processed page IDs, so we don't try to process same thing twice.
Definition: categoryChangesAsRdf.php:90
php
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency which acts as the top level factory for services in MediaWiki which can be used to gain access to default instances of various services MediaWikiServices however also allows new services to be defined and default services to be redefined Services are defined or redefined by providing a callback the instantiator that will return a new instance of the service When it will create an instance of MediaWikiServices and populate it with the services defined in the files listed by thereby bootstrapping the DI framework Per $wgServiceWiringFiles lists includes ServiceWiring php
Definition: injection.txt:37
Wikimedia\Rdbms\IDatabase
Basic database interface for live and lazy-loaded relation database handles.
Definition: IDatabase.php:38
CategoryChangesAsRdf\getMovedCatsIterator
getMovedCatsIterator(IDatabase $dbr)
Fetch moved categories.
Definition: categoryChangesAsRdf.php:301
CategoryChangesAsRdf\getRestoredCatsIterator
getRestoredCatsIterator(IDatabase $dbr)
Fetch restored categories.
Definition: categoryChangesAsRdf.php:348
$dbr
$dbr
Definition: testCompression.php:50
$output
static configuration should be added through ResourceLoaderGetConfigVars instead can be used to get the real title e g db for database replication lag or jobqueue for job queue size converted to pseudo seconds It is possible to add more fields and they will be returned to the user in the API response after the basic globals have been set but before ordinary actions take place $output
Definition: hooks.txt:2272
CategoryChangesAsRdf\getCategoriesUpdate
getCategoriesUpdate(IDatabase $dbr, $deleteUrls, $pages, $mark)
Get SPARQL for updating set of categories.
Definition: categoryChangesAsRdf.php:191
CategoryChangesAsRdf\updateTS
updateTS( $timestamp)
Generate SPARQL Update code for updating dump timestamp.
Definition: categoryChangesAsRdf.php:221
CategoryChangesAsRdf\addTimestampConditions
addTimestampConditions(BatchRowIterator $it, IDatabase $dbr)
Add timestamp limits to iterator.
Definition: categoryChangesAsRdf.php:385
Maintenance\addOption
addOption( $name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
Definition: Maintenance.php:248
CategoryChangesAsRdf\execute
execute()
Do the actual work.
Definition: categoryChangesAsRdf.php:115
CategoryChangesAsRdf\$categoriesRdf
CategoriesRdf $categoriesRdf
Categories RDF helper.
Definition: categoryChangesAsRdf.php:80
CategoryChangesAsRdf\handleAdds
handleAdds(IDatabase $dbr, $output)
Definition: categoryChangesAsRdf.php:525
CategoryChangesAsRdf\SPARQL_DELETE_INSERT
const SPARQL_DELETE_INSERT
Delete/Insert query.
Definition: categoryChangesAsRdf.php:59
CategoryChangesAsRdf
Maintenance script to provide RDF representation of the recent changes in category tree.
Definition: categoryChangesAsRdf.php:31
CategoryChangesAsRdf\SPARQL_INSERT
const SPARQL_INSERT
Insert query.
Definition: categoryChangesAsRdf.php:35
CategoryChangesAsRdf\handleRestores
handleRestores(IDatabase $dbr, $output)
Definition: categoryChangesAsRdf.php:496
BatchRowIterator\addOptions
addOptions(array $options)
Definition: BatchRowIterator.php:119
DB_REPLICA
const DB_REPLICA
Definition: defines.php:25
NS_CATEGORY
const NS_CATEGORY
Definition: Defines.php:87
array
The wiki should then use memcached to cache various data To use multiple just add more items to the array To increase the weight of a make its entry a array("192.168.0.1:11211", 2))
CategoryChangesAsRdf\handleMoves
handleMoves(IDatabase $dbr, $output)
Definition: categoryChangesAsRdf.php:466
CategoryChangesAsRdf\getCategoryLinksIterator
getCategoryLinksIterator(IDatabase $dbr, array $ids)
Get iterator for links for categories.
Definition: categoryChangesAsRdf.php:408
CategoryChangesAsRdf\$startTS
$startTS
Definition: categoryChangesAsRdf.php:82
CategoryChangesAsRdf\writeCategoryData
writeCategoryData( $row)
Write category data to RDF.
Definition: categoryChangesAsRdf.php:453
$wgRCMaxAge
$wgRCMaxAge
Recentchanges items are periodically purged; entries older than this many seconds will go.
Definition: DefaultSettings.php:6790
CategoryChangesAsRdf\$rdfWriter
RdfWriter $rdfWriter
Definition: categoryChangesAsRdf.php:75
DELETE
DELETE
Definition: updatets.txt:1
CategoryChangesAsRdf\getChangedCatsIterator
getChangedCatsIterator(IDatabase $dbr, $type)
Fetch categorization changes or edits.
Definition: categoryChangesAsRdf.php:368
CategoryChangesAsRdf\setupChangesIterator
setupChangesIterator(IDatabase $dbr, array $columns=[], array $extra_tables=[])
Set up standard iterator for retrieving category changes.
Definition: categoryChangesAsRdf.php:246
CategoryChangesAsRdf\$endTS
$endTS
Definition: categoryChangesAsRdf.php:83
$maintClass
$maintClass
Definition: categoryChangesAsRdf.php:694
CategoryChangesAsRdf\SPARQL_DELETE
const SPARQL_DELETE
Delete query.
Definition: categoryChangesAsRdf.php:45
Maintenance\getOption
getOption( $name, $default=null)
Get an option, or return the default.
Definition: Maintenance.php:283
CategoryChangesAsRdf\initialize
initialize()
Initialize external service classes.
Definition: categoryChangesAsRdf.php:109
CategoryChangesAsRdf\handleEdits
handleEdits(IDatabase $dbr, $output)
Handle edits for category texts.
Definition: categoryChangesAsRdf.php:553
as
This document is intended to provide useful advice for parties seeking to redistribute MediaWiki to end users It s targeted particularly at maintainers for Linux since it s been observed that distribution packages of MediaWiki often break We ve consistently had to recommend that users seeking support use official tarballs instead of their distribution s and this often solves whatever problem the user is having It would be nice if this could such as
Definition: distributors.txt:22
Maintenance\getDB
getDB( $db, $groups=[], $wiki=false)
Returns a database to be used by current maintenance script.
Definition: Maintenance.php:1373
$batch
$batch
Definition: linkcache.txt:23
Maintenance\error
error( $err, $die=0)
Throw an error to the user.
Definition: Maintenance.php:462
class
you have access to all of the normal MediaWiki so you can get a DB use the etc For full docs on the Maintenance class
Definition: maintenance.txt:56
RC_CATEGORIZE
const RC_CATEGORIZE
Definition: Defines.php:155
CategoryChangesAsRdf\handleCategorization
handleCategorization(IDatabase $dbr, $output)
Handles categorization changes.
Definition: categoryChangesAsRdf.php:583
CategoryChangesAsRdf\__construct
__construct()
Default constructor.
Definition: categoryChangesAsRdf.php:92
CategoryChangesAsRdf\handleDeletes
handleDeletes(IDatabase $dbr, $output)
Handle category deletes.
Definition: categoryChangesAsRdf.php:436
Maintenance\setBatchSize
setBatchSize( $s=0)
Set the batch size.
Definition: Maintenance.php:375
CategoryChangesAsRdf\addIndex
addIndex(BatchRowIterator $it)
Need to force index, somehow on terbium the optimizer chooses wrong one.
Definition: categoryChangesAsRdf.php:396
$type
$type
Definition: testCompression.php:48