MediaWiki  master
CategoryFinder.php
Go to the documentation of this file.
1 <?php
24 
50  protected $articles = [];
51 
53  protected $deadend = [];
54 
56  protected $parents = [];
57 
59  protected $next = [];
60 
62  protected $maxdepth = -1;
63 
65  protected $targets = [];
66 
68  protected $name2id = [];
69 
71  protected $mode;
72 
74  protected $dbr;
75 
87  public function seed( $articleIds, $categories, $mode = 'AND', $maxdepth = -1 ) {
88  $this->articles = $articleIds;
89  $this->next = $articleIds;
90  $this->mode = $mode;
91  $this->maxdepth = $maxdepth;
92 
93  # Set the list of target categories; convert them to DBKEY form first
94  $this->targets = [];
95  foreach ( $categories as $c ) {
96  $ct = Title::makeTitleSafe( NS_CATEGORY, $c );
97  if ( $ct ) {
98  $c = $ct->getDBkey();
99  $this->targets[$c] = $c;
100  }
101  }
102  }
103 
109  public function run() {
110  $this->dbr = wfGetDB( DB_REPLICA );
111 
112  $i = 0;
113  $dig = true;
114  while ( count( $this->next ) && $dig ) {
115  $this->scanNextLayer();
116 
117  // Is there any depth limit?
118  if ( $this->maxdepth !== -1 ) {
119  $dig = $i < $this->maxdepth;
120  $i++;
121  }
122  }
123 
124  # Now check if this applies to the individual articles
125  $ret = [];
126 
127  foreach ( $this->articles as $article ) {
128  $conds = $this->targets;
129  if ( $this->check( $article, $conds ) ) {
130  # Matches the conditions
131  $ret[] = $article;
132  }
133  }
134  return $ret;
135  }
136 
141  public function getParents() {
142  return $this->parents;
143  }
144 
152  private function check( $id, &$conds, $path = [] ) {
153  // Check for loops and stop!
154  if ( in_array( $id, $path ) ) {
155  return false;
156  }
157 
158  $path[] = $id;
159 
160  # Shortcut (runtime paranoia): No conditions=all matched
161  if ( count( $conds ) == 0 ) {
162  return true;
163  }
164 
165  if ( !isset( $this->parents[$id] ) ) {
166  return false;
167  }
168 
169  # iterate through the parents
170  foreach ( $this->parents[$id] as $p ) {
171  $pname = $p->cl_to;
172 
173  # Is this a condition?
174  if ( isset( $conds[$pname] ) ) {
175  # This key is in the category list!
176  if ( $this->mode == 'OR' ) {
177  # One found, that's enough!
178  $conds = [];
179  return true;
180  } else {
181  # Assuming "AND" as default
182  unset( $conds[$pname] );
183  if ( count( $conds ) == 0 ) {
184  # All conditions met, done
185  return true;
186  }
187  }
188  }
189 
190  # Not done yet, try sub-parents
191  if ( !isset( $this->name2id[$pname] ) ) {
192  # No sub-parent
193  continue;
194  }
195  $done = $this->check( $this->name2id[$pname], $conds, $path );
196  if ( $done || count( $conds ) == 0 ) {
197  # Subparents have done it!
198  return true;
199  }
200  }
201  return false;
202  }
203 
207  private function scanNextLayer() {
208  # Find all parents of the article currently in $this->next
209  $layer = [];
210  $res = $this->dbr->select(
211  /* FROM */ 'categorylinks',
212  /* SELECT */ [ 'cl_to', 'cl_from' ],
213  /* WHERE */ [ 'cl_from' => $this->next ],
214  __METHOD__ . '-1'
215  );
216  foreach ( $res as $row ) {
217  $k = $row->cl_to;
218 
219  # Update parent tree
220  if ( !isset( $this->parents[$row->cl_from] ) ) {
221  $this->parents[$row->cl_from] = [];
222  }
223  $this->parents[$row->cl_from][$k] = $row;
224 
225  # Ignore those we already have
226  if ( in_array( $k, $this->deadend ) ) {
227  continue;
228  }
229 
230  if ( isset( $this->name2id[$k] ) ) {
231  continue;
232  }
233 
234  # Hey, new category!
235  $layer[$k] = $k;
236  }
237 
238  $this->next = [];
239 
240  # Find the IDs of all category pages in $layer, if they exist
241  if ( count( $layer ) > 0 ) {
242  $res = $this->dbr->select(
243  /* FROM */ 'page',
244  /* SELECT */ [ 'page_id', 'page_title' ],
245  /* WHERE */ [ 'page_namespace' => NS_CATEGORY, 'page_title' => $layer ],
246  __METHOD__ . '-2'
247  );
248  foreach ( $res as $row ) {
249  $id = $row->page_id;
250  $name = $row->page_title;
251  $this->name2id[$name] = $id;
252  $this->next[] = $id;
253  unset( $layer[$name] );
254  }
255  }
256 
257  # Mark dead ends
258  foreach ( $layer as $v ) {
259  $this->deadend[$v] = $v;
260  }
261  }
262 }
The "CategoryFinder" class takes a list of articles, creates an internal representation of all their ...
IDatabase $dbr
Read-DB replica DB.
array $deadend
Array of DBKEY category names for categories that don&#39;t have a page.
array $parents
Array of [ ID => [] ].
int [] $articles
The original article IDs passed to the seed function.
wfGetDB( $db, $groups=[], $wiki=false)
Get a Database object.
run()
Iterates through the parent tree starting with the seed values, then checks the articles if they matc...
array $targets
Array of DBKEY category names.
getParents()
Get the parents.
const NS_CATEGORY
Definition: Defines.php:74
scanNextLayer()
Scans a "parent layer" of the articles/categories in $this->next.
array $next
Array of article/category IDs.
static makeTitleSafe( $ns, $title, $fragment='', $interwiki='')
Create a new Title from a namespace index and a DB key.
Definition: Title.php:613
seed( $articleIds, $categories, $mode='AND', $maxdepth=-1)
Initializes the instance.
int $maxdepth
Max layer depth.
const DB_REPLICA
Definition: defines.php:25
string $mode
"AND" or "OR"
check( $id, &$conds, $path=[])
This functions recurses through the parent representation, trying to match the conditions.