MediaWiki master
ApiQueryDuplicateFiles.php
Go to the documentation of this file.
1<?php
25
32
33 private RepoGroup $repoGroup;
34
40 public function __construct(
41 ApiQuery $query,
42 $moduleName,
43 RepoGroup $repoGroup
44 ) {
45 parent::__construct( $query, $moduleName, 'df' );
46 $this->repoGroup = $repoGroup;
47 }
48
49 public function execute() {
50 $this->run();
51 }
52
53 public function getCacheMode( $params ) {
54 return 'public';
55 }
56
57 public function executeGenerator( $resultPageSet ) {
58 $this->run( $resultPageSet );
59 }
60
64 private function run( $resultPageSet = null ) {
66 $namespaces = $this->getPageSet()->getGoodAndMissingTitlesByNamespace();
67 if ( empty( $namespaces[NS_FILE] ) ) {
68 return;
69 }
70 $images = $namespaces[NS_FILE];
71
72 if ( $params['dir'] == 'descending' ) {
73 $images = array_reverse( $images );
74 }
75
76 $skipUntilThisDup = false;
77 if ( isset( $params['continue'] ) ) {
78 $cont = $this->parseContinueParamOrDie( $params['continue'], [ 'string', 'string' ] );
79 $fromImage = $cont[0];
80 $skipUntilThisDup = $cont[1];
81 // Filter out any images before $fromImage
82 foreach ( $images as $image => $pageId ) {
83 if ( $image < $fromImage ) {
84 unset( $images[$image] );
85 } else {
86 break;
87 }
88 }
89 }
90
91 $filesToFind = array_keys( $images );
92 if ( $params['localonly'] ) {
93 $files = $this->repoGroup->getLocalRepo()->findFiles( $filesToFind );
94 } else {
95 $files = $this->repoGroup->findFiles( $filesToFind );
96 }
97
98 $fit = true;
99 $count = 0;
100 $titles = [];
101
102 $sha1s = [];
103 foreach ( $files as $file ) {
105 $sha1s[$file->getName()] = $file->getSha1();
106 }
107
108 // find all files with the hashes, result format is:
109 // [ hash => [ dup1, dup2 ], hash1 => ... ]
110 $filesToFindBySha1s = array_unique( array_values( $sha1s ) );
111 if ( $params['localonly'] ) {
112 $filesBySha1s = $this->repoGroup->getLocalRepo()->findBySha1s( $filesToFindBySha1s );
113 } else {
114 $filesBySha1s = $this->repoGroup->findBySha1s( $filesToFindBySha1s );
115 }
116
117 // iterate over $images to handle continue param correct
118 foreach ( $images as $image => $pageId ) {
119 if ( !isset( $sha1s[$image] ) ) {
120 continue; // file does not exist
121 }
122 $sha1 = $sha1s[$image];
123 $dupFiles = $filesBySha1s[$sha1];
124 if ( $params['dir'] == 'descending' ) {
125 $dupFiles = array_reverse( $dupFiles );
126 }
128 foreach ( $dupFiles as $dupFile ) {
129 $dupName = $dupFile->getName();
130 if ( $image == $dupName && $dupFile->isLocal() ) {
131 continue; // ignore the local file itself
132 }
133 if ( $skipUntilThisDup !== false && $dupName < $skipUntilThisDup ) {
134 continue; // skip to pos after the image from continue param
135 }
136 $skipUntilThisDup = false;
137 if ( ++$count > $params['limit'] ) {
138 $fit = false; // break outer loop
139 // We're one over limit which shows that
140 // there are additional images to be had. Stop here...
141 $this->setContinueEnumParameter( 'continue', $image . '|' . $dupName );
142 break;
143 }
144 if ( $resultPageSet !== null ) {
145 $titles[] = $dupFile->getTitle();
146 } else {
147 $r = [
148 'name' => $dupName,
149 'timestamp' => wfTimestamp( TS_ISO_8601, $dupFile->getTimestamp() ),
150 'shared' => !$dupFile->isLocal(),
151 ];
152 $uploader = $dupFile->getUploader( File::FOR_PUBLIC );
153 if ( $uploader ) {
154 $r['user'] = $uploader->getName();
155 }
156 $fit = $this->addPageSubItem( $pageId, $r );
157 if ( !$fit ) {
158 $this->setContinueEnumParameter( 'continue', $image . '|' . $dupName );
159 break;
160 }
161 }
162 }
163 if ( !$fit ) {
164 break;
165 }
166 }
167 if ( $resultPageSet !== null ) {
168 $resultPageSet->populateFromTitles( $titles );
169 }
170 }
171
172 public function getAllowedParams() {
173 return [
174 'limit' => [
175 ParamValidator::PARAM_DEFAULT => 10,
176 ParamValidator::PARAM_TYPE => 'limit',
177 IntegerDef::PARAM_MIN => 1,
178 IntegerDef::PARAM_MAX => ApiBase::LIMIT_BIG1,
179 IntegerDef::PARAM_MAX2 => ApiBase::LIMIT_BIG2
180 ],
181 'continue' => [
182 ApiBase::PARAM_HELP_MSG => 'api-help-param-continue',
183 ],
184 'dir' => [
185 ParamValidator::PARAM_DEFAULT => 'ascending',
186 ParamValidator::PARAM_TYPE => [
187 'ascending',
188 'descending'
189 ]
190 ],
191 'localonly' => false,
192 ];
193 }
194
195 protected function getExamplesMessages() {
196 return [
197 'action=query&titles=File:Albert_Einstein_Head.jpg&prop=duplicatefiles'
198 => 'apihelp-query+duplicatefiles-example-simple',
199 'action=query&generator=allimages&prop=duplicatefiles'
200 => 'apihelp-query+duplicatefiles-example-generated',
201 ];
202 }
203
204 public function getHelpUrls() {
205 return 'https://www.mediawiki.org/wiki/Special:MyLanguage/API:Duplicatefiles';
206 }
207}
const NS_FILE
Definition Defines.php:71
wfTimestamp( $outputtype=TS_UNIX, $ts=0)
Get a timestamp string in one of various formats.
array $params
The job parameters.
run()
Run the job.
parseContinueParamOrDie(string $continue, array $types)
Parse the 'continue' parameter in the usual format and validate the types of each part,...
Definition ApiBase.php:1731
const LIMIT_BIG1
Fast query, standard limit.
Definition ApiBase.php:237
extractRequestParams( $options=[])
Using getAllowedParams(), this function makes an array of the values provided by the user,...
Definition ApiBase.php:821
const PARAM_HELP_MSG
(string|array|Message) Specify an alternative i18n documentation message for this parameter.
Definition ApiBase.php:172
const LIMIT_BIG2
Fast query, apihighlimits limit.
Definition ApiBase.php:239
addPageSubItem( $pageId, $item, $elemname=null)
Same as addPageSubItems(), but one element of $data at a time.
A query module to list duplicates of the given file(s)
execute()
Evaluates the parameters, performs the requested query, and sets up the result.
executeGenerator( $resultPageSet)
Execute this module as a generator.
getAllowedParams()
Returns an array of allowed parameters (parameter name) => (default value) or (parameter name) => (ar...
getCacheMode( $params)
Get the cache mode for the data generated by this module.
getExamplesMessages()
Returns usage examples for this module.
getHelpUrls()
Return links to more detailed help pages about the module.
__construct(ApiQuery $query, $moduleName, RepoGroup $repoGroup)
setContinueEnumParameter( $paramName, $paramValue)
Overridden to set the generator param if in generator mode.
getPageSet()
Get the PageSet object to work on.
This is the main query class.
Definition ApiQuery.php:43
Implements some public methods and some protected utility functions which are required by multiple ch...
Definition File.php:74
getSha1()
Get the SHA-1 base 36 hash of the file.
Definition File.php:2332
Prioritized list of file repositories.
Definition RepoGroup.php:30
Service for formatting and validating API parameters.
Type definition for integer types.