MediaWiki  master
grep.php
Go to the documentation of this file.
1 <?php
2 // phpcs:disable MediaWiki.Files.ClassMatchesFilename.NotMatch
6 
7 require_once __DIR__ . '/Maintenance.php';
8 
14 class GrepPages extends Maintenance {
16  private $contLang;
17 
19  private $wikiPageFactory;
20 
21  public function __construct() {
22  parent::__construct();
23  $this->addDescription( 'Search the source text of pages for lines matching ' .
24  'a given regex, and print the lines.' );
25  $this->addOption( 'prefix',
26  'Title prefix. Can be specified more than once. ' .
27  'Use e.g. --prefix=Talk: to search an entire namespace.',
28  false, true, false, true );
29  $this->addOption( 'show-wiki', 'Add the wiki ID to the output' );
30  $this->addOption( 'pages-with-matches',
31  'Suppress normal output; instead print the title of each page ' .
32  'from which output would normally have been printed.',
33  false, false, 'l' );
34  $this->addArg( 'regex', 'The regex to search for' );
35  }
36 
37  private function init() {
38  $services = MediaWikiServices::getInstance();
39  $this->contLang = $services->getContentLanguage();
40  $this->wikiPageFactory = $services->getWikiPageFactory();
41  }
42 
43  public function execute() {
44  $this->init();
45 
46  $showWiki = $this->getOption( 'show-wiki' );
47  $wikiId = WikiMap::getCurrentWikiId();
48  $prefix = $this->getOption( 'prefix' );
49  $regex = $this->getArg( 0 );
50  $titleOnly = $this->hasOption( 'pages-with-matches' );
51 
52  if ( ( $regex[0] ?? '' ) === '/' ) {
53  $delimRegex = $regex[0];
54  } else {
55  $delimRegex = '{' . $regex . '}';
56  }
57 
58  foreach ( $this->findPages( $prefix ) as $page ) {
59  $content = $page->getContent( RevisionRecord::RAW );
60  $titleText = $page->getTitle()->getPrefixedDBkey();
61  if ( !$content ) {
62  $this->error( "Page has no content: $titleText" );
63  continue;
64  }
65  if ( !$content instanceof TextContent ) {
66  $this->error( "Page has a non-text content model: $titleText" );
67  continue;
68  }
69 
70  $text = $content->getText();
71 
72  if ( $titleOnly ) {
73  if ( preg_match( $delimRegex, $text ) ) {
74  if ( $showWiki ) {
75  echo "$wikiId\t$titleText\n";
76  } else {
77  echo "$titleText\n";
78  }
79  }
80  } else {
81  foreach ( StringUtils::explode( "\n", $text ) as $lineNum => $line ) {
82  $lineNum++;
83  if ( preg_match( $delimRegex, $line ) ) {
84  if ( $showWiki ) {
85  echo "$wikiId\t$titleText:$lineNum:$line\n";
86  } else {
87  echo "$titleText:$lineNum:$line\n";
88  }
89  }
90  }
91  }
92  }
93  }
94 
95  public function findPages( $prefixes = null ) {
96  $dbr = $this->getDB( DB_REPLICA );
97  $orConds = [];
98  if ( $prefixes !== null ) {
99  foreach ( $prefixes as $prefix ) {
100  $colonPos = strpos( $prefix, ':' );
101  if ( $colonPos !== false ) {
102  $ns = $this->contLang->getNsIndex( substr( $prefix, 0, $colonPos ) );
103  $prefixDBkey = substr( $prefix, $colonPos + 1 );
104  } else {
105  $ns = NS_MAIN;
106  $prefixDBkey = $prefix;
107  }
108  $prefixCond = [ 'page_namespace' => $ns ];
109  if ( $prefixDBkey !== '' ) {
110  $prefixCond[] = 'page_title ' . $dbr->buildLike( $prefixDBkey, $dbr->anyString() );
111  }
112  $orConds[] = $dbr->makeList( $prefixCond, LIST_AND );
113  }
114  }
115 
116  $conds = $orConds ? $dbr->makeList( $orConds, LIST_OR ) : [];
117  $pageQuery = WikiPage::getQueryInfo();
118 
119  $res = $dbr->newSelectQueryBuilder()
120  ->queryInfo( $pageQuery )
121  ->where( $conds )
122  ->caller( __METHOD__ )
123  ->fetchResultSet();
124  foreach ( $res as $row ) {
125  $title = Title::newFromRow( $row );
126  yield $this->wikiPageFactory->newFromTitle( $title );
127  }
128  }
129 }
130 
131 $maintClass = GrepPages::class;
132 require_once RUN_MAINTENANCE_IF_MAIN;
const NS_MAIN
Definition: Defines.php:64
const LIST_OR
Definition: Defines.php:46
const LIST_AND
Definition: Defines.php:43
Search pages for a given regex.
Definition: grep.php:14
execute()
Do the actual work.
Definition: grep.php:43
findPages( $prefixes=null)
Definition: grep.php:95
__construct()
Default constructor.
Definition: grep.php:21
Abstract maintenance class for quickly writing and churning out maintenance scripts with minimal effo...
Definition: Maintenance.php:66
getDB( $db, $groups=[], $dbDomain=false)
Returns a database to be used by current maintenance script.
error( $err, $die=0)
Throw an error to the user.
addArg( $arg, $description, $required=true)
Add some args that are needed.
hasOption( $name)
Checks to see if a particular option was set.
getArg( $argId=0, $default=null)
Get an argument.
addDescription( $text)
Set the description text.
addOption( $name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
getOption( $name, $default=null)
Get an option, or return the default.
Service locator for MediaWiki core services.
Service for creating WikiPage objects.
Page revision base class.
static explode( $separator, $subject)
Workalike for explode() with limited memory usage.
Content object implementation for representing flat text.
Definition: TextContent.php:40
static newFromRow( $row)
Make a Title object from a DB row.
Definition: Title.php:573
static getCurrentWikiId()
Definition: WikiMap.php:303
static getQueryInfo()
Return the tables, fields, and join conditions to be selected to create a new page object.
Definition: WikiPage.php:359
$maintClass
Definition: grep.php:131
$line
Definition: mcc.php:119
const DB_REPLICA
Definition: defines.php:26
$content
Definition: router.php:76