19use Wikimedia\Purtle\RdfWriter;
20use Wikimedia\Purtle\RdfWriterFactory;
23require_once __DIR__ .
'/Maintenance.php';
43 parent::__construct();
45 $this->
addDescription(
"Generate RDF dump of categories in a wiki." );
48 $this->
addOption(
'output',
"Output file (default is stdout). Will be overwritten.",
50 $this->
addOption(
'format',
"Set the dump format.",
false,
true );
62 [
'page',
'page_props',
'category' ],
69 $it->setFetchColumns( [
77 $it->addJoinConditions(
80 'LEFT JOIN', [
'pp_propname' =>
'hiddencat',
'pp_page = page_id' ]
83 'LEFT JOIN', [
'cat_title = page_title' ]
88 $it->setCaller( $fname );
103 [
'cl_from',
'cl_to' ],
106 $it->addConditions( [
107 'cl_type' =>
'subcat',
110 $it->setFetchColumns( [
'cl_from',
'cl_to' ] );
111 $it->setCaller( $fname );
112 return new RecursiveIteratorIterator( $it );
121 if ( substr( $licenseUrl, 0, 2 ) ==
'//' ) {
122 $licenseUrl =
'https:' . $licenseUrl;
124 $this->rdfWriter->about( $this->categoriesRdf->getDumpURI() )
125 ->a(
'schema',
'Dataset' )
126 ->a(
'owl',
'Ontology' )
127 ->say(
'cc',
'license' )->is( $licenseUrl )
129 ->say(
'schema',
'dateModified' )
130 ->value(
wfTimestamp( TS_ISO_8601, $timestamp ),
'xsd',
'dateTime' )
136 $outFile = $this->
getOption(
'output',
'php://stdout' );
138 if ( $outFile ===
'-' ) {
139 $outFile =
'php://stdout';
142 $output = fopen( $outFile,
'w' );
144 $this->categoriesRdf =
new CategoriesRdf( $this->rdfWriter );
146 $this->categoriesRdf->setupPrefixes();
147 $this->rdfWriter->start();
150 fwrite( $output, $this->rdfWriter->drain() );
156 foreach ( $batch as $row ) {
157 $this->categoriesRdf->writeCategoryData(
159 $row->pp_propname ===
'hiddencat',
160 (
int)$row->cat_pages - (
int)$row->cat_subcats - (
int)$row->cat_files,
161 (
int)$row->cat_subcats
163 if ( $row->page_id ) {
164 $pages[$row->page_id] = $row->page_title;
169 $this->categoriesRdf->writeCategoryLinkData( $pages[$row->cl_from], $row->cl_to );
171 fwrite( $output, $this->rdfWriter->drain() );
174 if ( $outFile !==
'-' ) {
184 $factory =
new RdfWriterFactory();
185 return $factory->getWriter( $factory->getFormatName( $format ) );
190require_once RUN_MAINTENANCE_IF_MAIN;
$wgRightsUrl
Set this to specify an external URL containing details about the content license used on your wiki.
wfExpandUrl( $url, $defaultProto=PROTO_CURRENT)
Expand a potentially local URL to a fully-qualified URL.
wfTimestamp( $outputtype=TS_UNIX, $ts=0)
Get a timestamp string in one of various formats.
Allows iterating a large number of rows in batches transparently.
Helper class to produce RDF representation of categories.
const FORMAT_VERSION
Current version of the dump format.
const OWL_URL
OWL description of the ontology.
Maintenance script to provide RDF representation of the category tree.
getCategoryLinksIterator(IDatabase $dbr, array $ids, $fname)
Get iterator for links for categories.
getCategoryIterator(IDatabase $dbr, $fname)
Produce row iterator for categories.
execute()
Do the actual work.
addDumpHeader( $timestamp)
CategoriesRdf $categoriesRdf
Categories RDF helper.
__construct()
Default constructor.
createRdfWriter( $format)
Abstract maintenance class for quickly writing and churning out maintenance scripts with minimal effo...
getBatchSize()
Returns batch size.
addDescription( $text)
Set the description text.
addOption( $name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
getOption( $name, $default=null)
Get an option, or return the default.