MediaWiki  master
categoryChangesAsRdf.php
Go to the documentation of this file.
1 <?php
19 use Wikimedia\Purtle\RdfWriter;
20 use Wikimedia\Purtle\TurtleRdfWriter;
22 
23 require_once __DIR__ . '/Maintenance.php';
24 
35  private const SPARQL_INSERT = <<<SPARQL
36 INSERT DATA {
37 %s
38 };
39 
40 SPARQL;
41 
45  private const SPARQL_DELETE = <<<SPARQLD
46 DELETE {
47 ?category ?x ?y
48 } WHERE {
49  ?category ?x ?y
50  VALUES ?category {
51  %s
52  }
53 };
54 
55 SPARQLD;
56 
60  private $rdfWriter;
65  private $categoriesRdf;
66 
67  private $startTS;
68  private $endTS;
69 
75  protected $processed = [];
76 
77  public function __construct() {
78  parent::__construct();
79 
80  $this->addDescription( "Generate RDF dump of category changes in a wiki." );
81 
82  $this->setBatchSize( 200 );
83  $this->addOption( 'output', "Output file (default is stdout). Will be overwritten.", false,
84  true, 'o' );
85  $this->addOption( 'start', 'Starting timestamp (inclusive), in ISO or Mediawiki format.',
86  true, true, 's' );
87  $this->addOption( 'end', 'Ending timestamp (exclusive), in ISO or Mediawiki format.', true,
88  true, 'e' );
89  }
90 
94  public function initialize() {
95  // SPARQL Update syntax is close to Turtle format, so we can use Turtle writer.
96  $this->rdfWriter = new TurtleRdfWriter();
97  $this->categoriesRdf = new CategoriesRdf( $this->rdfWriter );
98  }
99 
100  public function execute() {
101  $this->initialize();
102  $startTS = new MWTimestamp( $this->getOption( "start" ) );
103 
104  $endTS = new MWTimestamp( $this->getOption( "end" ) );
105  $now = new MWTimestamp();
106  $rcMaxAge = $this->getConfig()->get( 'RCMaxAge' );
107 
108  if ( $now->getTimestamp() - $startTS->getTimestamp() > $rcMaxAge ) {
109  $this->error( "Start timestamp too old, maximum RC age is $rcMaxAge!" );
110  }
111  if ( $now->getTimestamp() - $endTS->getTimestamp() > $rcMaxAge ) {
112  $this->error( "End timestamp too old, maximum RC age is $rcMaxAge!" );
113  }
114 
115  $this->startTS = $startTS->getTimestamp();
116  $this->endTS = $endTS->getTimestamp();
117 
118  $outFile = $this->getOption( 'output', 'php://stdout' );
119  if ( $outFile === '-' ) {
120  $outFile = 'php://stdout';
121  }
122 
123  $output = fopen( $outFile, 'wb' );
124 
125  $this->categoriesRdf->setupPrefixes();
126  $this->rdfWriter->start();
127 
128  $prefixes = $this->getRdf();
129  // We have to strip @ from prefix, since SPARQL UPDATE doesn't use them
130  // Also strip dot at the end.
131  $prefixes = preg_replace( [ '/^@/m', '/\s*[.]$/m' ], '', $prefixes );
132  fwrite( $output, $prefixes );
133 
134  $dbr = $this->getDB( DB_REPLICA, [ 'vslow' ] );
135 
136  // Deletes go first because if the page was deleted, other changes
137  // do not matter. This only gets true deletes, i.e. not pages that were restored.
138  $this->handleDeletes( $dbr, $output );
139  // Moves go before additions because if category is moved, we should not process creation
140  // as it would produce wrong data - because create row has old title
141  $this->handleMoves( $dbr, $output );
142  // We need to handle restores too since delete may have happened in previous update.
143  $this->handleRestores( $dbr, $output );
144  // Process newly added pages
145  $this->handleAdds( $dbr, $output );
146  // Process page edits
147  $this->handleEdits( $dbr, $output );
148  // Process categorization changes
149  $this->handleCategorization( $dbr, $output );
150 
151  // Update timestamp
152  fwrite( $output, $this->updateTS( $this->endTS ) );
153  }
154 
159  private function getInsertRdf() {
160  $rdfText = $this->getRdf();
161  if ( !$rdfText ) {
162  return "";
163  }
164  return sprintf( self::SPARQL_INSERT, $rdfText );
165  }
166 
175  private function getCategoriesUpdate( IDatabase $dbr, $deleteUrls, $pages, $mark ) {
176  if ( empty( $deleteUrls ) ) {
177  return "";
178  }
179 
180  if ( !empty( $pages ) ) {
181  $this->writeParentCategories( $dbr, $pages );
182  }
183 
184  return "# $mark\n" . sprintf( self::SPARQL_DELETE, implode( ' ', $deleteUrls ) ) .
185  $this->getInsertRdf();
186  }
187 
194  private function writeParentCategories( IDatabase $dbr, $pages ) {
195  foreach ( $this->getCategoryLinksIterator( $dbr, array_keys( $pages ), __METHOD__ ) as $row ) {
196  $this->categoriesRdf->writeCategoryLinkData( $pages[$row->cl_from], $row->cl_to );
197  }
198  }
199 
205  public function updateTS( $timestamp ) {
206  $dumpUrl = '<' . $this->categoriesRdf->getDumpURI() . '>';
207  $ts = wfTimestamp( TS_ISO_8601, $timestamp );
208  $tsQuery = <<<SPARQL
209 DELETE {
210  $dumpUrl schema:dateModified ?o .
211 }
212 WHERE {
213  $dumpUrl schema:dateModified ?o .
214 };
215 INSERT DATA {
216  $dumpUrl schema:dateModified "$ts"^^xsd:dateTime .
217 }
218 
219 SPARQL;
220  return $tsQuery;
221  }
222 
231  private function setupChangesIterator(
232  IDatabase $dbr,
233  array $columns = [],
234  array $extra_tables = [],
235  $fname = __METHOD__
236  ) {
237  $tables = [ 'recentchanges', 'page_props', 'category' ];
238  if ( $extra_tables ) {
239  $tables = array_merge( $tables, $extra_tables );
240  }
241  $it = new BatchRowIterator( $dbr,
242  $tables,
243  [ 'rc_timestamp' ],
244  $this->mBatchSize
245  );
246  $this->addTimestampConditions( $it, $dbr );
247  $it->addJoinConditions(
248  [
249  'page_props' => [
250  'LEFT JOIN', [ 'pp_propname' => 'hiddencat', 'pp_page = rc_cur_id' ]
251  ],
252  'category' => [
253  'LEFT JOIN', [ 'cat_title = rc_title' ]
254  ]
255  ]
256  );
257  $it->setFetchColumns( array_merge( $columns, [
258  'rc_title',
259  'rc_cur_id',
260  'pp_propname',
261  'cat_pages',
262  'cat_subcats',
263  'cat_files'
264  ] ) );
265  $it->setCaller( $fname );
266  return $it;
267  }
268 
275  protected function getNewCatsIterator( IDatabase $dbr, $fname ) {
276  $it = $this->setupChangesIterator( $dbr, [], [], $fname );
277  $it->addConditions( [
278  'rc_namespace' => NS_CATEGORY,
279  'rc_new' => 1,
280  ] );
281  return $it;
282  }
283 
290  protected function getMovedCatsIterator( IDatabase $dbr, $fname ) {
291  $it = $this->setupChangesIterator(
292  $dbr,
293  [ 'page_title', 'page_namespace' ],
294  [ 'page' ],
295  $fname
296  );
297  $it->addConditions( [
298  'rc_namespace' => NS_CATEGORY,
299  'rc_new' => 0,
300  'rc_log_type' => 'move',
301  'rc_type' => RC_LOG,
302  ] );
303  $it->addJoinConditions( [
304  'page' => [ 'JOIN', 'rc_cur_id = page_id' ],
305  ] );
306  $this->addIndex( $it );
307  return $it;
308  }
309 
316  protected function getDeletedCatsIterator( IDatabase $dbr, $fname ) {
317  $it = new BatchRowIterator( $dbr,
318  'recentchanges',
319  [ 'rc_timestamp' ],
320  $this->mBatchSize
321  );
322  $this->addTimestampConditions( $it, $dbr );
323  $it->addConditions( [
324  'rc_namespace' => NS_CATEGORY,
325  'rc_new' => 0,
326  'rc_log_type' => 'delete',
327  'rc_log_action' => 'delete',
328  'rc_type' => RC_LOG,
329  // We will fetch ones that do not have page record. If they do,
330  // this means they were restored, thus restoring handler will pick it up.
331  'NOT EXISTS (SELECT * FROM page WHERE page_id = rc_cur_id)',
332  ] );
333  $this->addIndex( $it );
334  $it->setFetchColumns( [ 'rc_cur_id', 'rc_title' ] );
335  $it->setCaller( $fname );
336  return $it;
337  }
338 
345  protected function getRestoredCatsIterator( IDatabase $dbr, $fname ) {
346  $it = $this->setupChangesIterator( $dbr, [], [], $fname );
347  $it->addConditions( [
348  'rc_namespace' => NS_CATEGORY,
349  'rc_new' => 0,
350  'rc_log_type' => 'delete',
351  'rc_log_action' => 'restore',
352  'rc_type' => RC_LOG,
353  // We will only fetch ones that have page record
354  'EXISTS (SELECT page_id FROM page WHERE page_id = rc_cur_id)',
355  ] );
356  $this->addIndex( $it );
357  return $it;
358  }
359 
367  protected function getChangedCatsIterator( IDatabase $dbr, $type, $fname ) {
368  $it = $this->setupChangesIterator( $dbr, [], [], $fname );
369  $it->addConditions( [
370  'rc_namespace' => NS_CATEGORY,
371  'rc_new' => 0,
372  'rc_type' => $type,
373  ] );
374  $this->addIndex( $it );
375  return $it;
376  }
377 
384  $it->addConditions( [
385  'rc_timestamp >= ' . $dbr->addQuotes( $dbr->timestamp( $this->startTS ) ),
386  'rc_timestamp < ' . $dbr->addQuotes( $dbr->timestamp( $this->endTS ) ),
387  ] );
388  }
389 
394  private function addIndex( BatchRowIterator $it ) {
395  $it->addOptions( [
396  'USE INDEX' => [ 'recentchanges' => 'new_name_timestamp' ]
397  ] );
398  }
399 
407  protected function getCategoryLinksIterator( IDatabase $dbr, array $ids, $fname ) {
408  $it = new BatchRowIterator(
409  $dbr,
410  'categorylinks',
411  [ 'cl_from', 'cl_to' ],
412  $this->mBatchSize
413  );
414  $it->addConditions( [
415  'cl_type' => 'subcat',
416  'cl_from' => $ids
417  ] );
418  $it->setFetchColumns( [ 'cl_from', 'cl_to' ] );
419  $it->setCaller( $fname );
420  return new RecursiveIteratorIterator( $it );
421  }
422 
427  public function getRdf() {
428  return $this->rdfWriter->drain();
429  }
430 
436  public function handleDeletes( IDatabase $dbr, $output ) {
437  // This only does "true" deletes - i.e. those that the page stays deleted
438 
439  foreach ( $this->getDeletedCatsIterator( $dbr, __METHOD__ ) as $batch ) {
440  $deleteUrls = [];
441  foreach ( $batch as $row ) {
442  // This can produce duplicates, we don't care
443  $deleteUrls[] = '<' . $this->categoriesRdf->labelToUrl( $row->rc_title ) . '>';
444  $this->processed[$row->rc_cur_id] = true;
445  }
446  fwrite( $output, $this->getCategoriesUpdate( $dbr, $deleteUrls, [], "Deletes" ) );
447  }
448  }
449 
454  private function writeCategoryData( $row ) {
455  $this->categoriesRdf->writeCategoryData(
456  $row->rc_title,
457  $row->pp_propname === 'hiddencat',
458  (int)$row->cat_pages - (int)$row->cat_subcats - (int)$row->cat_files,
459  (int)$row->cat_subcats
460  );
461  }
462 
467  public function handleMoves( IDatabase $dbr, $output ) {
468  foreach ( $this->getMovedCatsIterator( $dbr, __METHOD__ ) as $batch ) {
469  $pages = [];
470  $deleteUrls = [];
471  foreach ( $batch as $row ) {
472  $deleteUrls[] = '<' . $this->categoriesRdf->labelToUrl( $row->rc_title ) . '>';
473 
474  if ( isset( $this->processed[$row->rc_cur_id] ) ) {
475  // We already captured this one before
476  continue;
477  }
478 
479  if ( $row->page_namespace != NS_CATEGORY ) {
480  // If page was moved out of Category:, we'll just delete
481  continue;
482  }
483  $row->rc_title = $row->page_title;
484  $this->writeCategoryData( $row );
485  $pages[$row->rc_cur_id] = $row->page_title;
486  $this->processed[$row->rc_cur_id] = true;
487  }
488 
489  fwrite( $output, $this->getCategoriesUpdate( $dbr, $deleteUrls, $pages, "Moves" ) );
490  }
491  }
492 
497  public function handleRestores( IDatabase $dbr, $output ) {
498  fwrite( $output, "# Restores\n" );
499 
500  // This will only find those restores that were not deleted later.
501  foreach ( $this->getRestoredCatsIterator( $dbr, __METHOD__ ) as $batch ) {
502  $pages = [];
503  foreach ( $batch as $row ) {
504  if ( isset( $this->processed[$row->rc_cur_id] ) ) {
505  // We already captured this one before
506  continue;
507  }
508  $this->writeCategoryData( $row );
509  $pages[$row->rc_cur_id] = $row->rc_title;
510  $this->processed[$row->rc_cur_id] = true;
511  }
512 
513  if ( empty( $pages ) ) {
514  continue;
515  }
516 
517  $this->writeParentCategories( $dbr, $pages );
518 
519  fwrite( $output, $this->getInsertRdf() );
520  }
521  }
522 
527  public function handleAdds( IDatabase $dbr, $output ) {
528  fwrite( $output, "# Additions\n" );
529 
530  foreach ( $this->getNewCatsIterator( $dbr, __METHOD__ ) as $batch ) {
531  $pages = [];
532  foreach ( $batch as $row ) {
533  if ( isset( $this->processed[$row->rc_cur_id] ) ) {
534  // We already captured this one before
535  continue;
536  }
537  $this->writeCategoryData( $row );
538  $pages[$row->rc_cur_id] = $row->rc_title;
539  $this->processed[$row->rc_cur_id] = true;
540  }
541 
542  if ( empty( $pages ) ) {
543  continue;
544  }
545 
546  $this->writeParentCategories( $dbr, $pages );
547  fwrite( $output, $this->getInsertRdf() );
548  }
549  }
550 
556  public function handleEdits( IDatabase $dbr, $output ) {
557  // Editing category can change hidden flag and add new parents.
558  // TODO: it's pretty expensive to update all edited categories, and most edits
559  // aren't actually interesting for us. Some way to know which are interesting?
560  // We can capture recategorization on the next step, but not change in hidden status.
561 
562  foreach ( $this->getChangedCatsIterator( $dbr, RC_EDIT, __METHOD__ ) as $batch ) {
563  $pages = [];
564  $deleteUrls = [];
565  foreach ( $batch as $row ) {
566  // Note that on categorization event, cur_id points to
567  // the child page, not the parent category!
568  if ( isset( $this->processed[$row->rc_cur_id] ) ) {
569  // We already captured this one before
570  continue;
571  }
572  $this->writeCategoryData( $row );
573  $pages[$row->rc_cur_id] = $row->rc_title;
574  $this->processed[$row->rc_cur_id] = true;
575  $deleteUrls[] = '<' . $this->categoriesRdf->labelToUrl( $row->rc_title ) . '>';
576  }
577 
578  fwrite( $output, $this->getCategoriesUpdate( $dbr, $deleteUrls, $pages, "Edits" ) );
579  }
580  }
581 
587  public function handleCategorization( IDatabase $dbr, $output ) {
588  $processedTitle = [];
589 
590  // Categorization change can add new parents and change counts
591  // for the parent category.
592 
593  foreach ( $this->getChangedCatsIterator( $dbr, RC_CATEGORIZE, __METHOD__ ) as $batch ) {
594  /*
595  * Note that on categorization event, cur_id points to
596  * the child page, not the parent category!
597  * So we need to have a two-stage process, since we have ID from one
598  * category and title from another, and we need both for proper updates.
599  * TODO: For now, we do full update even though some data hasn't changed,
600  * e.g. parents for parent cat and counts for child cat.
601  */
602  $childPages = [];
603  $parentCats = [];
604  foreach ( $batch as $row ) {
605  $childPages[$row->rc_cur_id] = true;
606  $parentCats[$row->rc_title] = true;
607  }
608 
609  $joinConditions = [
610  'page_props' => [
611  'LEFT JOIN',
612  [ 'pp_propname' => 'hiddencat', 'pp_page = page_id' ],
613  ],
614  'category' => [
615  'LEFT JOIN',
616  [ 'cat_title = page_title' ],
617  ],
618  ];
619 
620  $pages = [];
621  $deleteUrls = [];
622 
623  if ( $childPages ) {
624  // Load child rows by ID
625  $childRows = $dbr->select(
626  [ 'page', 'page_props', 'category' ],
627  [
628  'page_id',
629  'rc_title' => 'page_title',
630  'pp_propname',
631  'cat_pages',
632  'cat_subcats',
633  'cat_files',
634  ],
635  [ 'page_namespace' => NS_CATEGORY, 'page_id' => array_keys( $childPages ) ],
636  __METHOD__,
637  [],
638  $joinConditions
639  );
640  foreach ( $childRows as $row ) {
641  if ( isset( $this->processed[$row->page_id] ) ) {
642  // We already captured this one before
643  continue;
644  }
645  $this->writeCategoryData( $row );
646  if ( $row->page_id ) {
647  $pages[$row->page_id] = $row->rc_title;
648  $deleteUrls[] = '<' . $this->categoriesRdf->labelToUrl( $row->rc_title ) . '>';
649  $this->processed[$row->page_id] = true;
650  }
651  }
652  }
653 
654  if ( $parentCats ) {
655  // Load parent rows by title
656  $joinConditions = [
657  'page' => [
658  'LEFT JOIN',
659  [ 'page_title = cat_title', 'page_namespace' => NS_CATEGORY ],
660  ],
661  'page_props' => [
662  'LEFT JOIN',
663  [ 'pp_propname' => 'hiddencat', 'pp_page = page_id' ],
664  ],
665  ];
666 
667  $parentRows = $dbr->select(
668  [ 'category', 'page', 'page_props' ],
669  [
670  'page_id',
671  'rc_title' => 'cat_title',
672  'pp_propname',
673  'cat_pages',
674  'cat_subcats',
675  'cat_files',
676  ],
677  [ 'cat_title' => array_map( 'strval', array_keys( $parentCats ) ) ],
678  __METHOD__,
679  [],
680  $joinConditions
681  );
682  foreach ( $parentRows as $row ) {
683  if ( $row->page_id && isset( $this->processed[$row->page_id] ) ) {
684  // We already captured this one before
685  continue;
686  }
687  if ( isset( $processedTitle[$row->rc_title] ) ) {
688  // We already captured this one before
689  continue;
690  }
691  $this->writeCategoryData( $row );
692  if ( $row->page_id ) {
693  $pages[$row->page_id] = $row->rc_title;
694  $deleteUrls[] = '<' . $this->categoriesRdf->labelToUrl( $row->rc_title ) . '>';
695  $this->processed[$row->page_id] = true;
696  }
697  $processedTitle[$row->rc_title] = true;
698  }
699  }
700 
701  fwrite( $output, $this->getCategoriesUpdate( $dbr, $deleteUrls, $pages, "Changes" ) );
702  }
703  }
704 }
705 
706 $maintClass = CategoryChangesAsRdf::class;
707 require_once RUN_MAINTENANCE_IF_MAIN;
RUN_MAINTENANCE_IF_MAIN
const RUN_MAINTENANCE_IF_MAIN
Definition: Maintenance.php:38
CategoryChangesAsRdf\getRdf
getRdf()
Get accumulated RDF.
Definition: categoryChangesAsRdf.php:427
MWTimestamp
Library for creating and parsing MW-style timestamps.
Definition: MWTimestamp.php:34
CategoriesRdf
Helper class to produce RDF representation of categories.
Definition: CategoriesRdf.php:24
CategoryChangesAsRdf\getInsertRdf
getInsertRdf()
Get the text of SPARQL INSERT DATA clause.
Definition: categoryChangesAsRdf.php:159
CategoryChangesAsRdf\writeParentCategories
writeParentCategories(IDatabase $dbr, $pages)
Write parent data for a set of categories.
Definition: categoryChangesAsRdf.php:194
CategoryChangesAsRdf\getCategoryLinksIterator
getCategoryLinksIterator(IDatabase $dbr, array $ids, $fname)
Get iterator for links for categories.
Definition: categoryChangesAsRdf.php:407
Maintenance\addDescription
addDescription( $text)
Set the description text.
Definition: Maintenance.php:327
wfTimestamp
wfTimestamp( $outputtype=TS_UNIX, $ts=0)
Get a timestamp string in one of various formats.
Definition: GlobalFunctions.php:1815
BatchRowIterator\addConditions
addConditions(array $conditions)
Definition: BatchRowIterator.php:121
RC_LOG
const RC_LOG
Definition: Defines.php:133
BatchRowIterator
Allows iterating a large number of rows in batches transparently.
Definition: BatchRowIterator.php:33
RC_EDIT
const RC_EDIT
Definition: Defines.php:131
CategoryChangesAsRdf\getChangedCatsIterator
getChangedCatsIterator(IDatabase $dbr, $type, $fname)
Fetch categorization changes or edits.
Definition: categoryChangesAsRdf.php:367
Maintenance
Abstract maintenance class for quickly writing and churning out maintenance scripts with minimal effo...
Definition: Maintenance.php:55
CategoryChangesAsRdf\$processed
int[] $processed
List of processed page IDs, so we don't try to process same thing twice.
Definition: categoryChangesAsRdf.php:75
Wikimedia\Rdbms\IDatabase
Basic database interface for live and lazy-loaded relation database handles.
Definition: IDatabase.php:38
$dbr
$dbr
Definition: testCompression.php:54
CategoryChangesAsRdf\getCategoriesUpdate
getCategoriesUpdate(IDatabase $dbr, $deleteUrls, $pages, $mark)
Get SPARQL for updating set of categories.
Definition: categoryChangesAsRdf.php:175
Maintenance\getConfig
getConfig()
Definition: Maintenance.php:596
CategoryChangesAsRdf\updateTS
updateTS( $timestamp)
Generate SPARQL Update code for updating dump timestamp.
Definition: categoryChangesAsRdf.php:205
CategoryChangesAsRdf\addTimestampConditions
addTimestampConditions(BatchRowIterator $it, IDatabase $dbr)
Add timestamp limits to iterator.
Definition: categoryChangesAsRdf.php:383
Maintenance\addOption
addOption( $name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
Definition: Maintenance.php:245
CategoryChangesAsRdf\execute
execute()
Do the actual work.
Definition: categoryChangesAsRdf.php:100
CategoryChangesAsRdf\$categoriesRdf
CategoriesRdf $categoriesRdf
Categories RDF helper.
Definition: categoryChangesAsRdf.php:65
CategoryChangesAsRdf\handleAdds
handleAdds(IDatabase $dbr, $output)
Definition: categoryChangesAsRdf.php:527
CategoryChangesAsRdf
Maintenance script to provide RDF representation of the recent changes in category tree.
Definition: categoryChangesAsRdf.php:31
CategoryChangesAsRdf\SPARQL_INSERT
const SPARQL_INSERT
Insert query.
Definition: categoryChangesAsRdf.php:35
CategoryChangesAsRdf\handleRestores
handleRestores(IDatabase $dbr, $output)
Definition: categoryChangesAsRdf.php:497
BatchRowIterator\addOptions
addOptions(array $options)
Definition: BatchRowIterator.php:129
CategoryChangesAsRdf\getMovedCatsIterator
getMovedCatsIterator(IDatabase $dbr, $fname)
Fetch moved categories.
Definition: categoryChangesAsRdf.php:290
DB_REPLICA
const DB_REPLICA
Definition: defines.php:25
NS_CATEGORY
const NS_CATEGORY
Definition: Defines.php:83
CategoryChangesAsRdf\handleMoves
handleMoves(IDatabase $dbr, $output)
Definition: categoryChangesAsRdf.php:467
CategoryChangesAsRdf\$startTS
$startTS
Definition: categoryChangesAsRdf.php:67
CategoryChangesAsRdf\getDeletedCatsIterator
getDeletedCatsIterator(IDatabase $dbr, $fname)
Fetch deleted categories.
Definition: categoryChangesAsRdf.php:316
CategoryChangesAsRdf\writeCategoryData
writeCategoryData( $row)
Write category data to RDF.
Definition: categoryChangesAsRdf.php:454
CategoryChangesAsRdf\$rdfWriter
RdfWriter $rdfWriter
Definition: categoryChangesAsRdf.php:60
CategoryChangesAsRdf\getRestoredCatsIterator
getRestoredCatsIterator(IDatabase $dbr, $fname)
Fetch restored categories.
Definition: categoryChangesAsRdf.php:345
Maintenance\getDB
getDB( $db, $groups=[], $dbDomain=false)
Returns a database to be used by current maintenance script.
Definition: Maintenance.php:1366
CategoryChangesAsRdf\$endTS
$endTS
Definition: categoryChangesAsRdf.php:68
$maintClass
$maintClass
Definition: categoryChangesAsRdf.php:706
CategoryChangesAsRdf\SPARQL_DELETE
const SPARQL_DELETE
Delete query.
Definition: categoryChangesAsRdf.php:45
Maintenance\getOption
getOption( $name, $default=null)
Get an option, or return the default.
Definition: Maintenance.php:281
CategoryChangesAsRdf\initialize
initialize()
Initialize external service classes.
Definition: categoryChangesAsRdf.php:94
CategoryChangesAsRdf\handleEdits
handleEdits(IDatabase $dbr, $output)
Handle edits for category texts.
Definition: categoryChangesAsRdf.php:556
CategoryChangesAsRdf\getNewCatsIterator
getNewCatsIterator(IDatabase $dbr, $fname)
Fetch newly created categories.
Definition: categoryChangesAsRdf.php:275
Maintenance\error
error( $err, $die=0)
Throw an error to the user.
Definition: Maintenance.php:463
RC_CATEGORIZE
const RC_CATEGORIZE
Definition: Defines.php:135
CategoryChangesAsRdf\setupChangesIterator
setupChangesIterator(IDatabase $dbr, array $columns=[], array $extra_tables=[], $fname=__METHOD__)
Set up standard iterator for retrieving category changes.
Definition: categoryChangesAsRdf.php:231
CategoryChangesAsRdf\handleCategorization
handleCategorization(IDatabase $dbr, $output)
Handles categorization changes.
Definition: categoryChangesAsRdf.php:587
CategoryChangesAsRdf\__construct
__construct()
Default constructor.
Definition: categoryChangesAsRdf.php:77
CategoryChangesAsRdf\handleDeletes
handleDeletes(IDatabase $dbr, $output)
Handle category deletes.
Definition: categoryChangesAsRdf.php:436
Maintenance\setBatchSize
setBatchSize( $s=0)
Set the batch size.
Definition: Maintenance.php:374
CategoryChangesAsRdf\addIndex
addIndex(BatchRowIterator $it)
Need to force index, somehow on terbium the optimizer chooses wrong one.
Definition: categoryChangesAsRdf.php:394
$type
$type
Definition: testCompression.php:52