MediaWiki  master
ApiQueryDuplicateFiles.php
Go to the documentation of this file.
1 <?php
25 
32 
34  private $repoGroup;
35 
41  public function __construct(
42  ApiQuery $query,
43  $moduleName,
44  RepoGroup $repoGroup
45  ) {
46  parent::__construct( $query, $moduleName, 'df' );
47  $this->repoGroup = $repoGroup;
48  }
49 
50  public function execute() {
51  $this->run();
52  }
53 
54  public function getCacheMode( $params ) {
55  return 'public';
56  }
57 
58  public function executeGenerator( $resultPageSet ) {
59  $this->run( $resultPageSet );
60  }
61 
65  private function run( $resultPageSet = null ) {
66  $params = $this->extractRequestParams();
67  $namespaces = $this->getPageSet()->getGoodAndMissingTitlesByNamespace();
68  if ( empty( $namespaces[NS_FILE] ) ) {
69  return;
70  }
71  $images = $namespaces[NS_FILE];
72 
73  if ( $params['dir'] == 'descending' ) {
74  $images = array_reverse( $images );
75  }
76 
77  $skipUntilThisDup = false;
78  if ( isset( $params['continue'] ) ) {
79  $cont = $this->parseContinueParamOrDie( $params['continue'], [ 'string', 'string' ] );
80  $fromImage = $cont[0];
81  $skipUntilThisDup = $cont[1];
82  // Filter out any images before $fromImage
83  foreach ( $images as $image => $pageId ) {
84  if ( $image < $fromImage ) {
85  unset( $images[$image] );
86  } else {
87  break;
88  }
89  }
90  }
91 
92  $filesToFind = array_keys( $images );
93  if ( $params['localonly'] ) {
94  $files = $this->repoGroup->getLocalRepo()->findFiles( $filesToFind );
95  } else {
96  $files = $this->repoGroup->findFiles( $filesToFind );
97  }
98 
99  $fit = true;
100  $count = 0;
101  $titles = [];
102 
103  $sha1s = [];
104  foreach ( $files as $file ) {
106  $sha1s[$file->getName()] = $file->getSha1();
107  }
108 
109  // find all files with the hashes, result format is:
110  // [ hash => [ dup1, dup2 ], hash1 => ... ]
111  $filesToFindBySha1s = array_unique( array_values( $sha1s ) );
112  if ( $params['localonly'] ) {
113  $filesBySha1s = $this->repoGroup->getLocalRepo()->findBySha1s( $filesToFindBySha1s );
114  } else {
115  $filesBySha1s = $this->repoGroup->findBySha1s( $filesToFindBySha1s );
116  }
117 
118  // iterate over $images to handle continue param correct
119  foreach ( $images as $image => $pageId ) {
120  if ( !isset( $sha1s[$image] ) ) {
121  continue; // file does not exist
122  }
123  $sha1 = $sha1s[$image];
124  $dupFiles = $filesBySha1s[$sha1];
125  if ( $params['dir'] == 'descending' ) {
126  $dupFiles = array_reverse( $dupFiles );
127  }
129  foreach ( $dupFiles as $dupFile ) {
130  $dupName = $dupFile->getName();
131  if ( $image == $dupName && $dupFile->isLocal() ) {
132  continue; // ignore the local file itself
133  }
134  if ( $skipUntilThisDup !== false && $dupName < $skipUntilThisDup ) {
135  continue; // skip to pos after the image from continue param
136  }
137  $skipUntilThisDup = false;
138  if ( ++$count > $params['limit'] ) {
139  $fit = false; // break outer loop
140  // We're one over limit which shows that
141  // there are additional images to be had. Stop here...
142  $this->setContinueEnumParameter( 'continue', $image . '|' . $dupName );
143  break;
144  }
145  if ( $resultPageSet !== null ) {
146  $titles[] = $dupFile->getTitle();
147  } else {
148  $r = [
149  'name' => $dupName,
150  'timestamp' => wfTimestamp( TS_ISO_8601, $dupFile->getTimestamp() ),
151  'shared' => !$dupFile->isLocal(),
152  ];
153  $uploader = $dupFile->getUploader( File::FOR_PUBLIC );
154  if ( $uploader ) {
155  $r['user'] = $uploader->getName();
156  }
157  $fit = $this->addPageSubItem( $pageId, $r );
158  if ( !$fit ) {
159  $this->setContinueEnumParameter( 'continue', $image . '|' . $dupName );
160  break;
161  }
162  }
163  }
164  if ( !$fit ) {
165  break;
166  }
167  }
168  if ( $resultPageSet !== null ) {
169  $resultPageSet->populateFromTitles( $titles );
170  }
171  }
172 
173  public function getAllowedParams() {
174  return [
175  'limit' => [
176  ParamValidator::PARAM_DEFAULT => 10,
177  ParamValidator::PARAM_TYPE => 'limit',
178  IntegerDef::PARAM_MIN => 1,
179  IntegerDef::PARAM_MAX => ApiBase::LIMIT_BIG1,
180  IntegerDef::PARAM_MAX2 => ApiBase::LIMIT_BIG2
181  ],
182  'continue' => [
183  ApiBase::PARAM_HELP_MSG => 'api-help-param-continue',
184  ],
185  'dir' => [
186  ParamValidator::PARAM_DEFAULT => 'ascending',
187  ParamValidator::PARAM_TYPE => [
188  'ascending',
189  'descending'
190  ]
191  ],
192  'localonly' => false,
193  ];
194  }
195 
196  protected function getExamplesMessages() {
197  return [
198  'action=query&titles=File:Albert_Einstein_Head.jpg&prop=duplicatefiles'
199  => 'apihelp-query+duplicatefiles-example-simple',
200  'action=query&generator=allimages&prop=duplicatefiles'
201  => 'apihelp-query+duplicatefiles-example-generated',
202  ];
203  }
204 
205  public function getHelpUrls() {
206  return 'https://www.mediawiki.org/wiki/Special:MyLanguage/API:Duplicatefiles';
207  }
208 }
const NS_FILE
Definition: Defines.php:70
wfTimestamp( $outputtype=TS_UNIX, $ts=0)
Get a timestamp string in one of various formats.
parseContinueParamOrDie(string $continue, array $types)
Parse the 'continue' parameter in the usual format and validate the types of each part,...
Definition: ApiBase.php:1649
const LIMIT_BIG1
Fast query, standard limit.
Definition: ApiBase.php:229
extractRequestParams( $options=[])
Using getAllowedParams(), this function makes an array of the values provided by the user,...
Definition: ApiBase.php:773
const PARAM_HELP_MSG
(string|array|Message) Specify an alternative i18n documentation message for this parameter.
Definition: ApiBase.php:166
const LIMIT_BIG2
Fast query, apihighlimits limit.
Definition: ApiBase.php:231
addPageSubItem( $pageId, $item, $elemname=null)
Same as addPageSubItems(), but one element of $data at a time.
A query module to list duplicates of the given file(s)
execute()
Evaluates the parameters, performs the requested query, and sets up the result.
executeGenerator( $resultPageSet)
Execute this module as a generator.
getAllowedParams()
Returns an array of allowed parameters (parameter name) => (default value) or (parameter name) => (ar...
getCacheMode( $params)
Get the cache mode for the data generated by this module.
getExamplesMessages()
Returns usage examples for this module.
getHelpUrls()
Return links to more detailed help pages about the module.
__construct(ApiQuery $query, $moduleName, RepoGroup $repoGroup)
setContinueEnumParameter( $paramName, $paramValue)
Overridden to set the generator param if in generator mode.
getPageSet()
Get the PageSet object to work on.
This is the main query class.
Definition: ApiQuery.php:42
const FOR_PUBLIC
Definition: File.php:88
Prioritized list of file repositories.
Definition: RepoGroup.php:30
Service for formatting and validating API parameters.
Type definition for integer types.
Definition: IntegerDef.php:23
if(PHP_SAPI !='cli-server') if(!isset( $_SERVER['SCRIPT_FILENAME'])) $file
Item class for a filearchive table row.
Definition: router.php:42