MediaWiki master
ApiQueryDuplicateFiles.php
Go to the documentation of this file.
1<?php
9namespace MediaWiki\Api;
10
15use Wikimedia\Timestamp\TimestampFormat as TS;
16
23
24 private RepoGroup $repoGroup;
25
26 public function __construct(
27 ApiQuery $query,
28 string $moduleName,
29 RepoGroup $repoGroup
30 ) {
31 parent::__construct( $query, $moduleName, 'df' );
32 $this->repoGroup = $repoGroup;
33 }
34
35 public function execute() {
36 $this->run();
37 }
38
40 public function getCacheMode( $params ) {
41 return 'public';
42 }
43
45 public function executeGenerator( $resultPageSet ) {
46 $this->run( $resultPageSet );
47 }
48
52 private function run( $resultPageSet = null ) {
53 $params = $this->extractRequestParams();
54 $namespaces = $this->getPageSet()->getGoodAndMissingTitlesByNamespace();
55 if ( empty( $namespaces[NS_FILE] ) ) {
56 return;
57 }
58 $images = $namespaces[NS_FILE];
59
60 if ( $params['dir'] == 'descending' ) {
61 $images = array_reverse( $images );
62 }
63
64 $skipUntilThisDup = false;
65 if ( isset( $params['continue'] ) ) {
66 $cont = $this->parseContinueParamOrDie( $params['continue'], [ 'string', 'string' ] );
67 $fromImage = $cont[0];
68 $skipUntilThisDup = $cont[1];
69 // Filter out any images before $fromImage
70 foreach ( $images as $image => $pageId ) {
71 if ( $image < $fromImage ) {
72 unset( $images[$image] );
73 } else {
74 break;
75 }
76 }
77 }
78
79 $filesToFind = array_keys( $images );
80 if ( $params['localonly'] ) {
81 $files = $this->repoGroup->getLocalRepo()->findFiles( $filesToFind );
82 } else {
83 $files = $this->repoGroup->findFiles( $filesToFind );
84 }
85
86 $fit = true;
87 $count = 0;
88 $titles = [];
89
90 $sha1s = [];
91 foreach ( $files as $file ) {
93 $sha1s[$file->getName()] = $file->getSha1();
94 }
95
96 // find all files with the hashes, result format is:
97 // [ hash => [ dup1, dup2 ], hash1 => ... ]
98 $filesToFindBySha1s = array_unique( array_values( $sha1s ) );
99 if ( $params['localonly'] ) {
100 $filesBySha1s = $this->repoGroup->getLocalRepo()->findBySha1s( $filesToFindBySha1s );
101 } else {
102 $filesBySha1s = $this->repoGroup->findBySha1s( $filesToFindBySha1s );
103 }
104
105 // iterate over $images to handle continue param correct
106 foreach ( $images as $image => $pageId ) {
107 if ( !isset( $sha1s[$image] ) ) {
108 continue; // file does not exist
109 }
110 $sha1 = $sha1s[$image];
111 $dupFiles = $filesBySha1s[$sha1];
112 if ( $params['dir'] == 'descending' ) {
113 $dupFiles = array_reverse( $dupFiles );
114 }
116 foreach ( $dupFiles as $dupFile ) {
117 $dupName = $dupFile->getName();
118 if ( $image == $dupName && $dupFile->isLocal() ) {
119 continue; // ignore the local file itself
120 }
121 if ( $skipUntilThisDup !== false && $dupName < $skipUntilThisDup ) {
122 continue; // skip to pos after the image from continue param
123 }
124 $skipUntilThisDup = false;
125 if ( ++$count > $params['limit'] ) {
126 $fit = false; // break outer loop
127 // We're one over limit which shows that
128 // there are additional images to be had. Stop here...
129 $this->setContinueEnumParameter( 'continue', $image . '|' . $dupName );
130 break;
131 }
132 if ( $resultPageSet !== null ) {
133 $titles[] = $dupFile->getTitle();
134 } else {
135 $r = [
136 'name' => $dupName,
137 'timestamp' => wfTimestamp( TS::ISO_8601, $dupFile->getTimestamp() ),
138 'shared' => !$dupFile->isLocal(),
139 ];
140 $uploader = $dupFile->getUploader( File::FOR_PUBLIC );
141 if ( $uploader ) {
142 $r['user'] = $uploader->getName();
143 }
144 $fit = $this->addPageSubItem( $pageId, $r );
145 if ( !$fit ) {
146 $this->setContinueEnumParameter( 'continue', $image . '|' . $dupName );
147 break;
148 }
149 }
150 }
151 if ( !$fit ) {
152 break;
153 }
154 }
155 if ( $resultPageSet !== null ) {
156 $resultPageSet->populateFromTitles( $titles );
157 }
158 }
159
161 public function getAllowedParams() {
162 return [
163 'limit' => [
164 ParamValidator::PARAM_DEFAULT => 10,
165 ParamValidator::PARAM_TYPE => 'limit',
166 IntegerDef::PARAM_MIN => 1,
167 IntegerDef::PARAM_MAX => ApiBase::LIMIT_BIG1,
168 IntegerDef::PARAM_MAX2 => ApiBase::LIMIT_BIG2
169 ],
170 'continue' => [
171 ApiBase::PARAM_HELP_MSG => 'api-help-param-continue',
172 ],
173 'dir' => [
174 ParamValidator::PARAM_DEFAULT => 'ascending',
175 ParamValidator::PARAM_TYPE => [
176 'ascending',
177 'descending'
178 ]
179 ],
180 'localonly' => false,
181 ];
182 }
183
185 protected function getExamplesMessages() {
186 return [
187 'action=query&titles=File:Albert_Einstein_Head.jpg&prop=duplicatefiles'
188 => 'apihelp-query+duplicatefiles-example-simple',
189 'action=query&generator=allimages&prop=duplicatefiles'
190 => 'apihelp-query+duplicatefiles-example-generated',
191 ];
192 }
193
195 public function getHelpUrls() {
196 return 'https://www.mediawiki.org/wiki/Special:MyLanguage/API:Duplicatefiles';
197 }
198}
199
201class_alias( ApiQueryDuplicateFiles::class, 'ApiQueryDuplicateFiles' );
const NS_FILE
Definition Defines.php:57
wfTimestamp( $outputtype=TS::UNIX, $ts=0)
Get a timestamp string in one of various formats.
parseContinueParamOrDie(string $continue, array $types)
Parse the 'continue' parameter in the usual format and validate the types of each part,...
Definition ApiBase.php:1696
const PARAM_HELP_MSG
(string|array|Message) Specify an alternative i18n documentation message for this parameter.
Definition ApiBase.php:167
const LIMIT_BIG2
Fast query, apihighlimits limit.
Definition ApiBase.php:234
extractRequestParams( $options=[])
Using getAllowedParams(), this function makes an array of the values provided by the user,...
Definition ApiBase.php:823
const LIMIT_BIG1
Fast query, standard limit.
Definition ApiBase.php:232
addPageSubItem( $pageId, $item, $elemname=null)
Same as addPageSubItems(), but one element of $data at a time.
A query module to list duplicates of the given file(s)
getExamplesMessages()
Returns usage examples for this module.Return value has query strings as keys, with values being eith...
execute()
Evaluates the parameters, performs the requested query, and sets up the result.
executeGenerator( $resultPageSet)
Execute this module as a generator.
__construct(ApiQuery $query, string $moduleName, RepoGroup $repoGroup)
getHelpUrls()
Return links to more detailed help pages about the module.1.25, returning boolean false is deprecated...
getAllowedParams()
Returns an array of allowed parameters (parameter name) => (default value) or (parameter name) => (ar...
getCacheMode( $params)
Get the cache mode for the data generated by this module.Override this in the module subclass....
setContinueEnumParameter( $paramName, $paramValue)
Overridden to set the generator param if in generator mode.
getPageSet()
Get the PageSet object to work on.
This is the main query class.
Definition ApiQuery.php:36
Implements some public methods and some protected utility functions which are required by multiple ch...
Definition File.php:79
Prioritized list of file repositories.
Definition RepoGroup.php:30
Service for formatting and validating API parameters.
Type definition for integer types.