MediaWiki master
importTextFiles.php
Go to the documentation of this file.
1<?php
15use Wikimedia\Timestamp\TimestampFormat as TS;
16
17// @codeCoverageIgnoreStart
18require_once __DIR__ . '/Maintenance.php';
19// @codeCoverageIgnoreEnd
20
28 public function __construct() {
29 parent::__construct();
30 $this->addDescription( 'Reads in text files and imports their content to pages of the wiki' );
31 $this->addOption( 'user', 'Username to which edits should be attributed. ' .
32 'Default: "Maintenance script"', false, true, 'u' );
33 $this->addOption( 'summary', 'Specify edit summary for the edits', false, true, 's' );
34 $this->addOption( 'use-timestamp', 'Use the modification date of the text file ' .
35 'as the timestamp for the edit' );
36 $this->addOption( 'overwrite', 'Overwrite existing pages. If --use-timestamp is passed, this ' .
37 'will only overwrite pages if the file has been modified since the page was last modified.' );
38 $this->addOption( 'prefix', 'A string to place in front of the file name', false, true, 'p' );
39 $this->addOption( 'bot', 'Mark edits as bot edits in the recent changes list.' );
40 $this->addOption( 'rc', 'Place revisions in RecentChanges.' );
41 $this->addArg( 'files', 'Files to import' );
42 }
43
44 public function execute() {
45 $userName = $this->getOption( 'user', false );
46 $summary = $this->getOption( 'summary', 'Imported from text file' );
47 $useTimestamp = $this->hasOption( 'use-timestamp' );
48 $rc = $this->hasOption( 'rc' );
49 $bot = $this->hasOption( 'bot' );
50 $overwrite = $this->hasOption( 'overwrite' );
51 $prefix = $this->getOption( 'prefix', '' );
52
53 // Get all the arguments. A loop is required since Maintenance doesn't
54 // support an arbitrary number of arguments.
55 $files = [];
56 $i = 0;
57 // phpcs:ignore Generic.CodeAnalysis.AssignmentInCondition.FoundInWhileCondition
58 while ( $arg = $this->getArg( $i++ ) ) {
59 if ( file_exists( $arg ) ) {
60 $files[$arg] = file_get_contents( $arg );
61 } else {
62 // use glob to support the Windows shell, which doesn't automatically
63 // expand wildcards
64 $found = false;
65 foreach ( glob( $arg ) as $filename ) {
66 $found = true;
67 $files[$filename] = file_get_contents( $filename );
68 }
69 if ( !$found ) {
70 $this->fatalError( "Fatal error: The file '$arg' does not exist!" );
71 }
72 }
73 }
74
75 $count = count( $files );
76 $this->output( "Importing $count pages...\n" );
77
78 if ( $userName === false ) {
79 $user = User::newSystemUser( User::MAINTENANCE_SCRIPT_USER, [ 'steal' => true ] );
80 } else {
81 $user = User::newFromName( $userName );
82 }
83
84 if ( !$user ) {
85 $this->fatalError( "Invalid username\n" );
86 }
87 if ( $user->isAnon() ) {
88 $user->addToDatabase();
89 }
90
91 $exit = 0;
92
93 $successCount = 0;
94 $failCount = 0;
95 $skipCount = 0;
96
97 $revLookup = $this->getServiceContainer()->getRevisionLookup();
98 $recentChangeFactory = $this->getServiceContainer()->getRecentChangeFactory();
99
100 foreach ( $files as $file => $text ) {
101 $pageName = $prefix . pathinfo( $file, PATHINFO_FILENAME );
102 $timestamp = $useTimestamp ? wfTimestamp( TS::UNIX, filemtime( $file ) ) : wfTimestampNow();
103
104 $title = Title::newFromText( $pageName );
105 // Have to check for # manually, since it gets interpreted as a fragment
106 if ( !$title || $title->hasFragment() ) {
107 $this->error( "Invalid title $pageName. Skipping.\n" );
108 $skipCount++;
109 continue;
110 }
111
112 $exists = $title->exists();
113 $oldRevID = $title->getLatestRevID();
114 $oldRevRecord = $oldRevID ? $revLookup->getRevisionById( $oldRevID ) : null;
115 $actualTitle = $title->getPrefixedText();
116
117 if ( $exists ) {
118 $touched = wfTimestamp( TS::UNIX, $title->getTouched() );
119 if ( !$overwrite ) {
120 $this->output( "Title $actualTitle already exists. Skipping.\n" );
121 $skipCount++;
122 continue;
123 } elseif ( $useTimestamp && intval( $touched ) >= intval( $timestamp ) ) {
124 $this->output( "File for title $actualTitle has not been modified since the " .
125 "destination page was touched. Skipping.\n" );
126 $skipCount++;
127 continue;
128 }
129 }
130
131 $content = ContentHandler::makeContent( rtrim( $text ), $title );
132 $rev = new WikiRevision();
133 $rev->setContent( SlotRecord::MAIN, $content );
134 $rev->setTitle( $title );
135 $rev->setUserObj( $user );
136 $rev->setComment( $summary );
137 $rev->setTimestamp( $timestamp );
138
139 if ( $exists &&
140 $overwrite &&
141 $rev->getContent()->equals( $oldRevRecord->getContent( SlotRecord::MAIN ) )
142 ) {
143 $this->output( "File for title $actualTitle contains no changes from the current " .
144 "revision. Skipping.\n" );
145 $skipCount++;
146 continue;
147 }
148
149 $status = $rev->importOldRevision();
150 $newId = $title->getLatestRevID();
151
152 if ( $status ) {
153 $action = $exists ? 'updated' : 'created';
154 $this->output( "Successfully $action $actualTitle\n" );
155 $successCount++;
156 } else {
157 $action = $exists ? 'update' : 'create';
158 $this->output( "Failed to $action $actualTitle\n" );
159 $failCount++;
160 $exit = 1;
161 }
162
163 // Create the RecentChanges entry if necessary
164 if ( $rc && $status ) {
165 if ( $exists ) {
166 if ( is_object( $oldRevRecord ) ) {
167 $recentChange = $recentChangeFactory->createEditRecentChange(
168 $timestamp,
169 $title,
170 $rev->getMinor(),
171 $user,
172 $summary,
173 $oldRevID,
174 $bot,
175 '',
176 $oldRevRecord->getSize(),
177 $rev->getSize(),
178 $newId,
179 // the pages don't need to be patrolled
180 1
181 );
182
183 $recentChangeFactory->insertRecentChange( $recentChange );
184 }
185 } else {
186 $recentChange = $recentChangeFactory->createNewPageRecentChange(
187 $timestamp,
188 $title,
189 $rev->getMinor(),
190 $user,
191 $summary,
192 $bot,
193 '',
194 $rev->getSize(),
195 $newId,
196 1
197 );
198
199 $recentChangeFactory->insertRecentChange( $recentChange );
200 }
201 }
202 }
203
204 $this->output( "Done! $successCount succeeded, $skipCount skipped.\n" );
205 if ( $exit ) {
206 $this->fatalError( "Import failed with $failCount failed pages.\n", $exit );
207 }
208 }
209}
210
211// @codeCoverageIgnoreStart
212$maintClass = ImportTextFiles::class;
213require_once RUN_MAINTENANCE_IF_MAIN;
214// @codeCoverageIgnoreEnd
wfTimestampNow()
Convenience function; returns MediaWiki timestamp for the present time.
wfTimestamp( $outputtype=TS::UNIX, $ts=0)
Get a timestamp string in one of various formats.
Maintenance script which reads in text files and imports their content to a page of the wiki.
execute()
Do the actual work.
__construct()
Default constructor.
Base class for content handling.
Abstract maintenance class for quickly writing and churning out maintenance scripts with minimal effo...
addArg( $arg, $description, $required=true, $multi=false)
Add some args that are needed.
getArg( $argId=0, $default=null)
Get an argument.
output( $out, $channel=null)
Throw some output to the user.
fatalError( $msg, $exitCode=1)
Output a message and terminate the current script.
addOption( $name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
hasOption( $name)
Checks to see if a particular option was set.
getOption( $name, $default=null)
Get an option, or return the default.
error( $err, $die=0)
Throw an error to the user.
getServiceContainer()
Returns the main service container.
addDescription( $text)
Set the description text.
Value object representing a content slot associated with a page revision.
Represents a title within MediaWiki.
Definition Title.php:69
User class for the MediaWiki software.
Definition User.php:130