MediaWiki master
grep.php
Go to the documentation of this file.
1<?php
2// phpcs:disable MediaWiki.Files.ClassMatchesFilename.NotMatch
14
15// @codeCoverageIgnoreStart
16require_once __DIR__ . '/Maintenance.php';
17// @codeCoverageIgnoreEnd
18
24class GrepPages extends Maintenance {
26 private $contLang;
27
29 private $wikiPageFactory;
30
31 public function __construct() {
32 parent::__construct();
33 $this->addDescription( 'Search the source text of pages for lines matching ' .
34 'a given regex, and print the lines.' );
35 $this->addOption( 'prefix',
36 'Title prefix. Can be specified more than once. ' .
37 'Use e.g. --prefix=Talk: to search an entire namespace.',
38 false, true, false, true );
39 $this->addOption( 'show-wiki', 'Add the wiki ID to the output' );
40 $this->addOption( 'pages-with-matches',
41 'Suppress normal output; instead print the title of each page ' .
42 'from which output would normally have been printed.',
43 false, false, 'l' );
44 $this->addArg( 'regex', 'The regex to search for' );
45 }
46
47 private function init() {
48 $services = $this->getServiceContainer();
49 $this->contLang = $services->getContentLanguage();
50 $this->wikiPageFactory = $services->getWikiPageFactory();
51 }
52
53 public function execute() {
54 $this->init();
55
56 $showWiki = $this->getOption( 'show-wiki' );
57 $wikiId = WikiMap::getCurrentWikiId();
58 $prefix = $this->getOption( 'prefix' );
59 $regex = $this->getArg( 0 );
60 $titleOnly = $this->hasOption( 'pages-with-matches' );
61
62 if ( ( $regex[0] ?? '' ) === '/' ) {
63 $delimRegex = $regex;
64 } else {
65 $delimRegex = '{' . $regex . '}';
66 }
67
68 foreach ( $this->findPages( $prefix ) as $page ) {
69 $content = $page->getContent( RevisionRecord::RAW );
70 $titleText = $page->getTitle()->getPrefixedDBkey();
71 if ( !$content ) {
72 $this->error( "Page has no content: $titleText" );
73 continue;
74 }
75 if ( !$content instanceof TextContent ) {
76 $this->error( "Page has a non-text content model: $titleText" );
77 continue;
78 }
79
80 $text = $content->getText();
81
82 if ( $titleOnly ) {
83 if ( preg_match( $delimRegex, $text ) ) {
84 if ( $showWiki ) {
85 echo "$wikiId\t$titleText\n";
86 } else {
87 echo "$titleText\n";
88 }
89 }
90 } else {
91 foreach ( StringUtils::explode( "\n", $text ) as $lineNum => $line ) {
92 $lineNum++;
93 if ( preg_match( $delimRegex, $line ) ) {
94 if ( $showWiki ) {
95 echo "$wikiId\t$titleText:$lineNum:$line\n";
96 } else {
97 echo "$titleText:$lineNum:$line\n";
98 }
99 }
100 }
101 }
102 }
103 }
104
105 public function findPages( ?array $prefixes = null ): iterable {
106 $dbr = $this->getReplicaDB();
107 $orConds = [];
108 if ( $prefixes !== null ) {
109 foreach ( $prefixes as $prefix ) {
110 $colonPos = strpos( $prefix, ':' );
111 if ( $colonPos !== false ) {
112 $ns = $this->contLang->getNsIndex( substr( $prefix, 0, $colonPos ) );
113 $prefixDBkey = substr( $prefix, $colonPos + 1 );
114 } else {
115 $ns = NS_MAIN;
116 $prefixDBkey = $prefix;
117 }
118 $prefixExpr = $dbr->expr( 'page_namespace', '=', $ns );
119 if ( $prefixDBkey !== '' ) {
120 $prefixExpr = $prefixExpr->and(
121 'page_title',
122 IExpression::LIKE,
123 new LikeValue( $prefixDBkey, $dbr->anyString() )
124 );
125 }
126 $orConds[] = $prefixExpr;
127 }
128 }
129 $lastId = 0;
130 do {
131 $res = $dbr->newSelectQueryBuilder()
132 ->queryInfo( WikiPage::getQueryInfo() )
133 ->where( $orConds ? $dbr->orExpr( $orConds ) : [] )
134 ->andWhere( $dbr->expr( 'page_id', '>', $lastId ) )
135 ->limit( 200 )
136 ->caller( __METHOD__ )
137 ->fetchResultSet();
138 foreach ( $res as $row ) {
139 $title = Title::newFromRow( $row );
140 yield $this->wikiPageFactory->newFromTitle( $title );
141 $lastId = $row->page_id;
142 }
143 } while ( $res->numRows() );
144 }
145}
146
147// @codeCoverageIgnoreStart
148$maintClass = GrepPages::class;
149require_once RUN_MAINTENANCE_IF_MAIN;
150// @codeCoverageIgnoreEnd
const NS_MAIN
Definition Defines.php:65
Search pages for a given regex.
Definition grep.php:24
execute()
Do the actual work.
Definition grep.php:53
findPages(?array $prefixes=null)
Definition grep.php:105
__construct()
Default constructor.
Definition grep.php:31
Content object implementation for representing flat text.
Base class for language-specific code.
Definition Language.php:81
Abstract maintenance class for quickly writing and churning out maintenance scripts with minimal effo...
addArg( $arg, $description, $required=true, $multi=false)
Add some args that are needed.
getArg( $argId=0, $default=null)
Get an argument.
addOption( $name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
hasOption( $name)
Checks to see if a particular option was set.
getOption( $name, $default=null)
Get an option, or return the default.
error( $err, $die=0)
Throw an error to the user.
getServiceContainer()
Returns the main service container.
addDescription( $text)
Set the description text.
Service for creating WikiPage objects.
Base representation for an editable wiki page.
Definition WikiPage.php:94
Page revision base class.
Represents a title within MediaWiki.
Definition Title.php:78
Tools for dealing with other locally-hosted wikis.
Definition WikiMap.php:33
Content of like value.
Definition LikeValue.php:14
A collection of static methods to play with strings.
$maintClass
Definition grep.php:148