21use Wikimedia\Purtle\RdfWriter;
22use Wikimedia\Purtle\RdfWriterFactory;
25require_once __DIR__ .
'/Maintenance.php';
42 private $categoriesRdf;
45 parent::__construct();
47 $this->
addDescription(
"Generate RDF dump of categories in a wiki." );
50 $this->
addOption(
'output',
"Output file (default is stdout). Will be overwritten.",
52 $this->
addOption(
'format',
"Set the dump format.",
false,
true );
64 [
'page',
'page_props',
'category' ],
71 $it->setFetchColumns( [
79 $it->addJoinConditions(
82 'LEFT JOIN', [
'pp_propname' =>
'hiddencat',
'pp_page = page_id' ]
85 'LEFT JOIN', [
'cat_title = page_title' ]
90 $it->setCaller( $fname );
105 [
'cl_from',
'cl_to' ],
108 $it->addConditions( [
109 'cl_type' =>
'subcat',
112 $it->setFetchColumns( [
'cl_from',
'cl_to' ] );
113 $it->setCaller( $fname );
114 return new RecursiveIteratorIterator( $it );
121 $licenseUrl = $this->
getConfig()->get( MainConfigNames::RightsUrl );
122 if ( substr( $licenseUrl, 0, 2 ) ==
'//' ) {
123 $licenseUrl =
'https:' . $licenseUrl;
125 $this->rdfWriter->about( $this->categoriesRdf->getDumpURI() )
126 ->a(
'schema',
'Dataset' )
127 ->a(
'owl',
'Ontology' )
128 ->say(
'cc',
'license' )->is( $licenseUrl )
130 ->say(
'schema',
'dateModified' )
131 ->value(
wfTimestamp( TS_ISO_8601, $timestamp ),
'xsd',
'dateTime' )
137 $outFile = $this->
getOption(
'output',
'php://stdout' );
139 if ( $outFile ===
'-' ) {
140 $outFile =
'php://stdout';
143 $output = fopen( $outFile,
'w' );
144 $this->rdfWriter = $this->createRdfWriter( $this->
getOption(
'format',
'ttl' ) );
145 $this->categoriesRdf =
new CategoriesRdf( $this->rdfWriter );
147 $this->categoriesRdf->setupPrefixes();
148 $this->rdfWriter->start();
151 fwrite( $output, $this->rdfWriter->drain() );
157 foreach ( $batch as $row ) {
158 $this->categoriesRdf->writeCategoryData(
160 $row->pp_propname ===
'hiddencat',
161 (
int)$row->cat_pages - (
int)$row->cat_subcats - (
int)$row->cat_files,
162 (
int)$row->cat_subcats
164 if ( $row->page_id ) {
165 $pages[$row->page_id] = $row->page_title;
170 $this->categoriesRdf->writeCategoryLinkData( $pages[$row->cl_from], $row->cl_to );
172 fwrite( $output, $this->rdfWriter->drain() );
175 if ( $outFile !==
'-' ) {
184 private function createRdfWriter( $format ) {
185 $factory =
new RdfWriterFactory();
186 return $factory->getWriter( $factory->getFormatName( $format ) );
191require_once RUN_MAINTENANCE_IF_MAIN;
wfExpandUrl( $url, $defaultProto=PROTO_CURRENT)
Expand a potentially local URL to a fully-qualified URL.
wfTimestamp( $outputtype=TS_UNIX, $ts=0)
Get a timestamp string in one of various formats.
Allows iterating a large number of rows in batches transparently.
Helper class to produce RDF representation of categories.
const FORMAT_VERSION
Current version of the dump format.
const OWL_URL
OWL description of the ontology.
Maintenance script to provide RDF representation of the category tree.
getCategoryLinksIterator(IDatabase $dbr, array $ids, $fname)
Get iterator for links for categories.
getCategoryIterator(IDatabase $dbr, $fname)
Produce row iterator for categories.
execute()
Do the actual work.
addDumpHeader( $timestamp)
__construct()
Default constructor.
Abstract maintenance class for quickly writing and churning out maintenance scripts with minimal effo...
getBatchSize()
Returns batch size.
addDescription( $text)
Set the description text.
addOption( $name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
getOption( $name, $default=null)
Get an option, or return the default.
A class containing constants representing the names of configuration variables.