9use Wikimedia\Purtle\RdfWriter;
10use Wikimedia\Purtle\RdfWriterFactory;
12use Wikimedia\Timestamp\TimestampFormat as TS;
15require_once __DIR__ .
'/Maintenance.php';
33 private $categoriesRdf;
36 parent::__construct();
38 $this->
addDescription(
"Generate RDF dump of categories in a wiki." );
41 $this->
addOption(
'output',
"Output file (default is stdout). Will be overwritten.",
43 $this->
addOption(
'format',
"Set the dump format.",
false,
true );
57 ->leftJoin(
'page_props',
null, [
'pp_propname' =>
'hiddencat',
'pp_page = page_id' ] )
58 ->leftJoin(
'category',
null, [
'cat_title = page_title' ] )
86 ->select( [
'cl_from',
'lt_title' ] )
87 ->from(
'categorylinks' )
88 ->join(
'linktarget',
null,
'cl_target_id=lt_id' )
90 'cl_type' =>
'subcat',
94 $primaryKey = [
'cl_from',
'cl_target_id' ];
102 return new RecursiveIteratorIterator( $it );
109 $licenseUrl = $this->
getConfig()->get( MainConfigNames::RightsUrl );
110 if ( str_starts_with( $licenseUrl,
'//' ) ) {
111 $licenseUrl =
'https:' . $licenseUrl;
114 $this->rdfWriter->about( $this->categoriesRdf->getDumpURI() )
115 ->a(
'schema',
'Dataset' )
116 ->a(
'owl',
'Ontology' )
117 ->say(
'cc',
'license' )->is( $licenseUrl )
118 ->say(
'schema',
'softwareVersion' )->value( CategoriesRdf::FORMAT_VERSION )
119 ->say(
'schema',
'dateModified' )
120 ->value(
wfTimestamp( TS::ISO_8601, $timestamp ),
'xsd',
'dateTime' )
121 ->say(
'schema',
'isPartOf' )->is( (
string)$urlUtils->expand(
'/',
PROTO_CANONICAL ) )
122 ->say(
'owl',
'imports' )->is( CategoriesRdf::OWL_URL );
126 $outFile = $this->
getOption(
'output',
'php://stdout' );
128 if ( $outFile ===
'-' ) {
129 $outFile =
'php://stdout';
132 $output = fopen( $outFile,
'w' );
133 $this->rdfWriter = $this->createRdfWriter( $this->
getOption(
'format',
'ttl' ) );
134 $this->categoriesRdf =
new CategoriesRdf( $this->rdfWriter );
136 $this->categoriesRdf->setupPrefixes();
137 $this->rdfWriter->start();
140 fwrite( $output, $this->rdfWriter->drain() );
146 foreach ( $batch as $row ) {
147 $this->categoriesRdf->writeCategoryData(
149 $row->pp_propname ===
'hiddencat',
150 (
int)$row->cat_pages - (
int)$row->cat_subcats - (
int)$row->cat_files,
151 (
int)$row->cat_subcats
153 if ( $row->page_id ) {
154 $pages[$row->page_id] = $row->page_title;
159 $this->categoriesRdf->writeCategoryLinkData( $pages[$row->cl_from], $row->lt_title );
161 fwrite( $output, $this->rdfWriter->drain() );
164 if ( $outFile !==
'-' ) {
173 private function createRdfWriter( $format ) {
174 $factory =
new RdfWriterFactory();
175 return $factory->getWriter( $factory->getFormatName( $format ) );
181require_once RUN_MAINTENANCE_IF_MAIN;
wfTimestamp( $outputtype=TS::UNIX, $ts=0)
Get a timestamp string in one of various formats.
Allows iterating a large number of rows in batches transparently.
Maintenance script to provide RDF representation of the category tree.
getCategoryLinksIterator(IReadableDatabase $dbr, array $ids, $fname)
Get iterator for links for categories.
execute()
Do the actual work.
addDumpHeader( $timestamp)
getCategoryIterator(IReadableDatabase $dbr, $fname)
Produce row iterator for categories.
__construct()
Default constructor.
A class containing constants representing the names of configuration variables.
Abstract maintenance class for quickly writing and churning out maintenance scripts with minimal effo...
getBatchSize()
Returns batch size.
addOption( $name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
getDB( $db, $groups=[], $dbDomain=false)
Returns a database to be used by current maintenance script.
getOption( $name, $default=null)
Get an option, or return the default.
getServiceContainer()
Returns the main service container.
addDescription( $text)
Set the description text.