19 use Wikimedia\Purtle\RdfWriter;
20 use Wikimedia\Purtle\TurtleRdfWriter;
23 require_once __DIR__ .
'/Maintenance.php';
95 parent::__construct();
97 $this->
addDescription(
"Generate RDF dump of category changes in a wiki." );
100 $this->
addOption(
'output',
"Output file (default is stdout). Will be overwritten.",
false,
102 $this->
addOption(
'start',
'Starting timestamp (inclusive), in ISO or Mediawiki format.',
104 $this->
addOption(
'end',
'Ending timestamp (exclusive), in ISO or Mediawiki format.',
true,
113 $this->rdfWriter =
new TurtleRdfWriter();
114 $this->categoriesRdf =
new CategoriesRdf( $this->rdfWriter );
123 $rcMaxAge = $this->
getConfig()->get(
'RCMaxAge' );
125 if ( $now->getTimestamp() -
$startTS->getTimestamp() > $rcMaxAge ) {
126 $this->
error(
"Start timestamp too old, maximum RC age is $rcMaxAge!" );
128 if ( $now->getTimestamp() -
$endTS->getTimestamp() > $rcMaxAge ) {
129 $this->
error(
"End timestamp too old, maximum RC age is $rcMaxAge!" );
132 $this->startTS =
$startTS->getTimestamp();
133 $this->endTS =
$endTS->getTimestamp();
135 $outFile = $this->
getOption(
'output',
'php://stdout' );
136 if ( $outFile ===
'-' ) {
137 $outFile =
'php://stdout';
140 $output = fopen( $outFile,
'wb' );
142 $this->categoriesRdf->setupPrefixes();
143 $this->rdfWriter->start();
145 $prefixes = $this->
getRdf();
148 $prefixes = preg_replace( [
'/^@/m',
'/\s*[.]$/m' ],
'', $prefixes );
177 $rdfText = $this->
getRdf();
181 return sprintf( self::SPARQL_INSERT, $rdfText );
193 if ( empty( $deleteUrls ) ) {
197 if ( !empty( $pages ) ) {
201 return "# $mark\n" . sprintf( self::SPARQL_DELETE, implode(
' ', $deleteUrls ) ) .
213 $this->categoriesRdf->writeCategoryLinkData( $pages[$row->cl_from], $row->cl_to );
223 $dumpUrl =
'<' . $this->categoriesRdf->getDumpURI() .
'>';
227 $dumpUrl schema:dateModified ?o .
230 $dumpUrl schema:dateModified ?o .
233 $dumpUrl schema:dateModified
"$ts"^^xsd:dateTime .
250 array $extra_tables = []
252 $tables = [
'recentchanges',
'page_props',
'category' ];
253 if ( $extra_tables ) {
254 $tables = array_merge( $tables, $extra_tables );
262 $it->addJoinConditions(
265 'LEFT JOIN', [
'pp_propname' =>
'hiddencat',
'pp_page = rc_cur_id' ]
268 'LEFT JOIN', [
'cat_title = rc_title' ]
272 $it->setFetchColumns( array_merge( $columns, [
290 $it->addConditions( [
304 $it->addConditions( [
307 'rc_log_type' =>
'move',
310 $it->addJoinConditions( [
311 'page' => [
'JOIN',
'rc_cur_id = page_id' ],
329 $it->addConditions( [
332 'rc_log_type' =>
'delete',
333 'rc_log_action' =>
'delete',
337 'NOT EXISTS (SELECT * FROM page WHERE page_id = rc_cur_id)',
340 $it->setFetchColumns( [
'rc_cur_id',
'rc_title' ] );
351 $it->addConditions( [
354 'rc_log_type' =>
'delete',
355 'rc_log_action' =>
'restore',
358 'EXISTS (SELECT page_id FROM page WHERE page_id = rc_cur_id)',
372 $it->addConditions( [
388 'rc_timestamp >= ' .
$dbr->addQuotes(
$dbr->timestamp( $this->startTS ) ),
389 'rc_timestamp < ' .
$dbr->addQuotes(
$dbr->timestamp( $this->endTS ) ),
399 'USE INDEX' => [
'recentchanges' =>
'new_name_timestamp' ]
413 [
'cl_from',
'cl_to' ],
416 $it->addConditions( [
417 'cl_type' =>
'subcat',
420 $it->setFetchColumns( [
'cl_from',
'cl_to' ] );
421 return new RecursiveIteratorIterator( $it );
429 return $this->rdfWriter->drain();
441 foreach ( $batch as $row ) {
443 $deleteUrls[] =
'<' . $this->categoriesRdf->labelToUrl( $row->rc_title ) .
'>';
444 $this->processed[$row->rc_cur_id] =
true;
455 $this->categoriesRdf->writeCategoryData(
457 $row->pp_propname ===
'hiddencat',
458 (
int)$row->cat_pages - (
int)$row->cat_subcats - (
int)$row->cat_files,
459 (
int)$row->cat_subcats
471 foreach ( $batch as $row ) {
472 $deleteUrls[] =
'<' . $this->categoriesRdf->labelToUrl( $row->rc_title ) .
'>';
474 if ( isset( $this->processed[$row->rc_cur_id] ) ) {
483 $row->rc_title = $row->page_title;
485 $pages[$row->rc_cur_id] = $row->page_title;
486 $this->processed[$row->rc_cur_id] =
true;
498 fwrite(
$output,
"# Restores\n" );
502 foreach ( $batch as $row ) {
503 if ( isset( $this->processed[$row->rc_cur_id] ) ) {
508 $pages[$row->rc_cur_id] = $row->rc_title;
509 $this->processed[$row->rc_cur_id] =
true;
512 if ( empty( $pages ) ) {
527 fwrite(
$output,
"# Additions\n" );
530 foreach ( $batch as $row ) {
531 if ( isset( $this->processed[$row->rc_cur_id] ) ) {
536 $pages[$row->rc_cur_id] = $row->rc_title;
537 $this->processed[$row->rc_cur_id] =
true;
540 if ( empty( $pages ) ) {
562 foreach ( $batch as $row ) {
565 if ( isset( $this->processed[$row->rc_cur_id] ) ) {
570 $pages[$row->rc_cur_id] = $row->rc_title;
571 $this->processed[$row->rc_cur_id] =
true;
572 $deleteUrls[] =
'<' . $this->categoriesRdf->labelToUrl( $row->rc_title ) .
'>';
585 $processedTitle = [];
599 foreach ( $batch as $row ) {
600 $childPages[$row->rc_cur_id] =
true;
601 $parentCats[$row->rc_title] =
true;
607 [
'pp_propname' =>
'hiddencat',
'pp_page = page_id' ],
611 [
'cat_title = page_title' ],
620 $childRows =
$dbr->select(
621 [
'page',
'page_props',
'category' ],
624 'rc_title' =>
'page_title',
630 [
'page_namespace' =>
NS_CATEGORY,
'page_id' => array_keys( $childPages ) ],
635 foreach ( $childRows as $row ) {
636 if ( isset( $this->processed[$row->page_id] ) ) {
641 $deleteUrls[] =
'<' . $this->categoriesRdf->labelToUrl( $row->rc_title ) .
'>';
642 $this->processed[$row->page_id] =
true;
651 [
'page_title = cat_title',
'page_namespace' =>
NS_CATEGORY ],
655 [
'pp_propname' =>
'hiddencat',
'pp_page = page_id' ],
660 [
'category',
'page',
'page_props' ],
663 'rc_title' =>
'cat_title',
669 [
'cat_title' => array_keys( $parentCats ) ],
674 foreach ( $parentRows as $row ) {
675 if ( $row->page_id && isset( $this->processed[$row->page_id] ) ) {
679 if ( isset( $processedTitle[$row->rc_title] ) ) {
684 $deleteUrls[] =
'<' . $this->categoriesRdf->labelToUrl( $row->rc_title ) .
'>';
685 if ( $row->page_id ) {
686 $this->processed[$row->page_id] =
true;
688 $processedTitle[$row->rc_title] =
true;