MediaWiki REL1_37
ApiQueryDuplicateFiles.php
Go to the documentation of this file.
1<?php
29
31 private $repoGroup;
32
38 public function __construct(
39 ApiQuery $query,
40 $moduleName,
42 ) {
43 parent::__construct( $query, $moduleName, 'df' );
44 $this->repoGroup = $repoGroup;
45 }
46
47 public function execute() {
48 $this->run();
49 }
50
51 public function getCacheMode( $params ) {
52 return 'public';
53 }
54
55 public function executeGenerator( $resultPageSet ) {
56 $this->run( $resultPageSet );
57 }
58
62 private function run( $resultPageSet = null ) {
63 $params = $this->extractRequestParams();
64 $namespaces = $this->getPageSet()->getGoodAndMissingTitlesByNamespace();
65 if ( empty( $namespaces[NS_FILE] ) ) {
66 return;
67 }
68 $images = $namespaces[NS_FILE];
69
70 if ( $params['dir'] == 'descending' ) {
71 $images = array_reverse( $images );
72 }
73
74 $skipUntilThisDup = false;
75 if ( isset( $params['continue'] ) ) {
76 $cont = explode( '|', $params['continue'] );
77 $this->dieContinueUsageIf( count( $cont ) != 2 );
78 $fromImage = $cont[0];
79 $skipUntilThisDup = $cont[1];
80 // Filter out any images before $fromImage
81 foreach ( $images as $image => $pageId ) {
82 if ( $image < $fromImage ) {
83 unset( $images[$image] );
84 } else {
85 break;
86 }
87 }
88 }
89
90 $filesToFind = array_keys( $images );
91 if ( $params['localonly'] ) {
92 $files = $this->repoGroup->getLocalRepo()->findFiles( $filesToFind );
93 } else {
94 $files = $this->repoGroup->findFiles( $filesToFind );
95 }
96
97 $fit = true;
98 $count = 0;
99 $titles = [];
100
101 $sha1s = [];
102 foreach ( $files as $file ) {
104 $sha1s[$file->getName()] = $file->getSha1();
105 }
106
107 // find all files with the hashes, result format is:
108 // [ hash => [ dup1, dup2 ], hash1 => ... ]
109 $filesToFindBySha1s = array_unique( array_values( $sha1s ) );
110 if ( $params['localonly'] ) {
111 $filesBySha1s = $this->repoGroup->getLocalRepo()->findBySha1s( $filesToFindBySha1s );
112 } else {
113 $filesBySha1s = $this->repoGroup->findBySha1s( $filesToFindBySha1s );
114 }
115
116 // iterate over $images to handle continue param correct
117 foreach ( $images as $image => $pageId ) {
118 if ( !isset( $sha1s[$image] ) ) {
119 continue; // file does not exist
120 }
121 $sha1 = $sha1s[$image];
122 $dupFiles = $filesBySha1s[$sha1];
123 if ( $params['dir'] == 'descending' ) {
124 $dupFiles = array_reverse( $dupFiles );
125 }
127 foreach ( $dupFiles as $dupFile ) {
128 $dupName = $dupFile->getName();
129 if ( $image == $dupName && $dupFile->isLocal() ) {
130 continue; // ignore the local file itself
131 }
132 if ( $skipUntilThisDup !== false && $dupName < $skipUntilThisDup ) {
133 continue; // skip to pos after the image from continue param
134 }
135 $skipUntilThisDup = false;
136 if ( ++$count > $params['limit'] ) {
137 $fit = false; // break outer loop
138 // We're one over limit which shows that
139 // there are additional images to be had. Stop here...
140 $this->setContinueEnumParameter( 'continue', $image . '|' . $dupName );
141 break;
142 }
143 if ( $resultPageSet !== null ) {
144 $titles[] = $dupFile->getTitle();
145 } else {
146 $r = [
147 'name' => $dupName,
148 'timestamp' => wfTimestamp( TS_ISO_8601, $dupFile->getTimestamp() ),
149 'shared' => !$dupFile->isLocal(),
150 ];
151 $uploader = $dupFile->getUploader( File::FOR_PUBLIC );
152 if ( $uploader ) {
153 $r['user'] = $uploader->getName();
154 }
155 $fit = $this->addPageSubItem( $pageId, $r );
156 if ( !$fit ) {
157 $this->setContinueEnumParameter( 'continue', $image . '|' . $dupName );
158 break;
159 }
160 }
161 }
162 if ( !$fit ) {
163 break;
164 }
165 }
166 if ( $resultPageSet !== null ) {
167 $resultPageSet->populateFromTitles( $titles );
168 }
169 }
170
171 public function getAllowedParams() {
172 return [
173 'limit' => [
175 ApiBase::PARAM_TYPE => 'limit',
179 ],
180 'continue' => [
181 ApiBase::PARAM_HELP_MSG => 'api-help-param-continue',
182 ],
183 'dir' => [
184 ApiBase::PARAM_DFLT => 'ascending',
186 'ascending',
187 'descending'
188 ]
189 ],
190 'localonly' => false,
191 ];
192 }
193
194 protected function getExamplesMessages() {
195 return [
196 'action=query&titles=File:Albert_Einstein_Head.jpg&prop=duplicatefiles'
197 => 'apihelp-query+duplicatefiles-example-simple',
198 'action=query&generator=allimages&prop=duplicatefiles'
199 => 'apihelp-query+duplicatefiles-example-generated',
200 ];
201 }
202
203 public function getHelpUrls() {
204 return 'https://www.mediawiki.org/wiki/Special:MyLanguage/API:Duplicatefiles';
205 }
206}
const NS_FILE
Definition Defines.php:70
wfTimestamp( $outputtype=TS_UNIX, $ts=0)
Get a timestamp string in one of various formats.
const PARAM_MAX2
Definition ApiBase.php:89
const PARAM_MAX
Definition ApiBase.php:85
dieContinueUsageIf( $condition)
Die with the 'badcontinue' error.
Definition ApiBase.php:1620
const PARAM_TYPE
Definition ApiBase.php:81
const PARAM_DFLT
Definition ApiBase.php:73
const PARAM_MIN
Definition ApiBase.php:93
const LIMIT_BIG1
Fast query, standard limit.
Definition ApiBase.php:220
extractRequestParams( $options=[])
Using getAllowedParams(), this function makes an array of the values provided by the user,...
Definition ApiBase.php:764
const PARAM_HELP_MSG
(string|array|Message) Specify an alternative i18n documentation message for this parameter.
Definition ApiBase.php:162
const LIMIT_BIG2
Fast query, apihighlimits limit.
Definition ApiBase.php:222
addPageSubItem( $pageId, $item, $elemname=null)
Same as addPageSubItems(), but one element of $data at a time.
A query module to list duplicates of the given file(s)
execute()
Evaluates the parameters, performs the requested query, and sets up the result.
executeGenerator( $resultPageSet)
Execute this module as a generator.
getAllowedParams()
Returns an array of allowed parameters (parameter name) => (default value) or (parameter name) => (ar...
getCacheMode( $params)
Get the cache mode for the data generated by this module.
getExamplesMessages()
Returns usage examples for this module.
getHelpUrls()
Return links to more detailed help pages about the module.
__construct(ApiQuery $query, $moduleName, RepoGroup $repoGroup)
setContinueEnumParameter( $paramName, $paramValue)
Overridden to set the generator param if in generator mode.
getPageSet()
Get the PageSet object to work on.
This is the main query class.
Definition ApiQuery.php:37
Implements some public methods and some protected utility functions which are required by multiple ch...
Definition File.php:66
getSha1()
Get the SHA-1 base 36 hash of the file.
Definition File.php:2335
Prioritized list of file repositories.
Definition RepoGroup.php:33
if(PHP_SAPI !='cli-server') if(!isset( $_SERVER['SCRIPT_FILENAME'])) $file
Item class for a filearchive table row.
Definition router.php:42