19use Wikimedia\Purtle\RdfWriter;
20use Wikimedia\Purtle\RdfWriterFactory;
23require_once __DIR__ .
'/Maintenance.php';
43 parent::__construct();
45 $this->
addDescription(
"Generate RDF dump of categories in a wiki." );
48 $this->
addOption(
'output',
"Output file (default is stdout). Will be overwritten.",
50 $this->
addOption(
'format',
"Set the dump format.",
false,
true );
61 [
'page',
'page_props',
'category' ],
68 $it->setFetchColumns( [
76 $it->addJoinConditions(
79 'LEFT JOIN', [
'pp_propname' =>
'hiddencat',
'pp_page = page_id' ]
82 'LEFT JOIN', [
'cat_title = page_title' ]
100 [
'cl_from',
'cl_to' ],
103 $it->addConditions( [
104 'cl_type' =>
'subcat',
107 $it->setFetchColumns( [
'cl_from',
'cl_to' ] );
108 return new RecursiveIteratorIterator( $it );
117 if ( substr( $licenseUrl, 0, 2 ) ==
'//' ) {
118 $licenseUrl =
'https:' . $licenseUrl;
120 $this->rdfWriter->about( $this->categoriesRdf->getDumpURI() )
121 ->a(
'schema',
'Dataset' )
122 ->a(
'owl',
'Ontology' )
123 ->say(
'cc',
'license' )->is( $licenseUrl )
125 ->say(
'schema',
'dateModified' )
126 ->value(
wfTimestamp( TS_ISO_8601, $timestamp ),
'xsd',
'dateTime' )
132 $outFile = $this->
getOption(
'output',
'php://stdout' );
134 if ( $outFile ===
'-' ) {
135 $outFile =
'php://stdout';
138 $output = fopen( $outFile,
'w' );
140 $this->categoriesRdf =
new CategoriesRdf( $this->rdfWriter );
142 $this->categoriesRdf->setupPrefixes();
143 $this->rdfWriter->start();
146 fwrite(
$output, $this->rdfWriter->drain() );
152 foreach (
$batch as $row ) {
153 $this->categoriesRdf->writeCategoryData(
155 $row->pp_propname ===
'hiddencat',
156 (
int)$row->cat_pages - (
int)$row->cat_subcats - (
int)$row->cat_files,
157 (
int)$row->cat_subcats
159 if ( $row->page_id ) {
160 $pages[$row->page_id] = $row->page_title;
165 $this->categoriesRdf->writeCategoryLinkData( $pages[$row->cl_from], $row->cl_to );
167 fwrite(
$output, $this->rdfWriter->drain() );
170 if ( $outFile !==
'-' ) {
180 $factory =
new RdfWriterFactory();
181 return $factory->getWriter( $factory->getFormatName( $format ) );
$wgRightsUrl
Set this to specify an external URL containing details about the content license used on your wiki.
wfExpandUrl( $url, $defaultProto=PROTO_CURRENT)
Expand a potentially local URL to a fully-qualified URL.
wfTimestamp( $outputtype=TS_UNIX, $ts=0)
Get a timestamp string in one of various formats.
Helper class to produce RDF representation of categories.
const FORMAT_VERSION
Current version of the dump format.
const OWL_URL
OWL description of the ontology.
Maintenance script to provide RDF representation of the category tree.
getCategoryIterator(IDatabase $dbr)
Produce row iterator for categories.
getCategoryLinksIterator(IDatabase $dbr, array $ids)
Get iterator for links for categories.
execute()
Do the actual work.
addDumpHeader( $timestamp)
CategoriesRdf $categoriesRdf
Categories RDF helper.
__construct()
Default constructor.
createRdfWriter( $format)
Abstract maintenance class for quickly writing and churning out maintenance scripts with minimal effo...
getDB( $db, $groups=[], $wiki=false)
Returns a database to be used by current maintenance script.
getBatchSize()
Returns batch size.
addDescription( $text)
Set the description text.
addOption( $name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
getOption( $name, $default=null)
Get an option, or return the default.
setBatchSize( $s=0)
Set the batch size.
static configuration should be added through ResourceLoaderGetConfigVars instead can be used to get the real title after the basic globals have been set but before ordinary actions take place $output
require_once RUN_MAINTENANCE_IF_MAIN