19use Wikimedia\Purtle\RdfWriter;
20use Wikimedia\Purtle\RdfWriterFactory;
23require_once __DIR__ .
'/Maintenance.php';
43 parent::__construct();
45 $this->
addDescription(
"Generate RDF dump of categories in a wiki." );
48 $this->
addOption(
'output',
"Output file (default is stdout). Will be overwritten.",
50 $this->
addOption(
'format',
"Set the dump format.",
false,
true );
61 [
'page',
'page_props',
'category' ],
68 $it->setFetchColumns( [
76 $it->addJoinConditions(
79 'LEFT JOIN', [
'pp_propname' =>
'hiddencat',
'pp_page = page_id' ]
82 'LEFT JOIN', [
'cat_title = page_title' ]
100 [
'cl_from',
'cl_to' ],
103 $it->addConditions( [
104 'cl_type' =>
'subcat',
107 $it->setFetchColumns( [
'cl_from',
'cl_to' ] );
108 return new RecursiveIteratorIterator( $it );
117 if ( substr( $licenseUrl, 0, 2 ) ==
'//' ) {
118 $licenseUrl =
'https:' . $licenseUrl;
120 $this->rdfWriter->about( $this->categoriesRdf->getDumpURI() )
121 ->a(
'schema',
'Dataset' )
122 ->a(
'owl',
'Ontology' )
123 ->say(
'cc',
'license' )->is( $licenseUrl )
125 ->say(
'schema',
'dateModified' )
126 ->value(
wfTimestamp( TS_ISO_8601, $timestamp ),
'xsd',
'dateTime' )
132 $outFile = $this->
getOption(
'output',
'php://stdout' );
134 if ( $outFile ===
'-' ) {
135 $outFile =
'php://stdout';
138 $output = fopen( $outFile,
'w' );
140 $this->categoriesRdf =
new CategoriesRdf( $this->rdfWriter );
142 $this->categoriesRdf->setupPrefixes();
143 $this->rdfWriter->start();
146 fwrite( $output, $this->rdfWriter->drain() );
152 foreach ( $batch as $row ) {
153 $this->categoriesRdf->writeCategoryData(
155 $row->pp_propname ===
'hiddencat',
156 (
int)$row->cat_pages - (
int)$row->cat_subcats - (
int)$row->cat_files,
157 (
int)$row->cat_subcats
159 if ( $row->page_id ) {
160 $pages[$row->page_id] = $row->page_title;
165 $this->categoriesRdf->writeCategoryLinkData( $pages[$row->cl_from], $row->cl_to );
167 fwrite( $output, $this->rdfWriter->drain() );
170 if ( $outFile !==
'-' ) {
180 $factory =
new RdfWriterFactory();
181 return $factory->getWriter( $factory->getFormatName( $format ) );
$wgRightsUrl
Set this to specify an external URL containing details about the content license used on your wiki.
wfExpandUrl( $url, $defaultProto=PROTO_CURRENT)
Expand a potentially local URL to a fully-qualified URL.
wfTimestamp( $outputtype=TS_UNIX, $ts=0)
Get a timestamp string in one of various formats.
const RUN_MAINTENANCE_IF_MAIN
Allows iterating a large number of rows in batches transparently.
Helper class to produce RDF representation of categories.
const FORMAT_VERSION
Current version of the dump format.
const OWL_URL
OWL description of the ontology.
Maintenance script to provide RDF representation of the category tree.
getCategoryIterator(IDatabase $dbr)
Produce row iterator for categories.
getCategoryLinksIterator(IDatabase $dbr, array $ids)
Get iterator for links for categories.
execute()
Do the actual work.
addDumpHeader( $timestamp)
CategoriesRdf $categoriesRdf
Categories RDF helper.
__construct()
Default constructor.
createRdfWriter( $format)
Abstract maintenance class for quickly writing and churning out maintenance scripts with minimal effo...
getBatchSize()
Returns batch size.
addDescription( $text)
Set the description text.
addOption( $name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
getOption( $name, $default=null)
Get an option, or return the default.
setBatchSize( $s=0)
Set the batch size.