29 const CACHE_VERSION = 2;
44 private $supportedContentModels = [
'wikitext' ];
52 parent::__construct( $query, $moduleName, self::PREFIX );
53 $this->config = $conf;
62 $titles = $this->getPageSet()->getGoodTitles();
63 if ( $titles === [] ) {
66 $isXml = $this->getMain()->isInternalMode()
67 || $this->getMain()->getPrinter()->getFormat() ==
'XML';
68 $result = $this->getResult();
69 $params = $this->params = $this->extractRequestParams();
70 $this->requireMaxOneParameter( $params,
'chars',
'sentences' );
72 $limit = intval( $params[
'limit'] );
73 if ( $limit > 1 && !$params[
'intro'] ) {
75 $this->addWarning( [
'apiwarn-textextracts-limit', $limit ] );
77 if ( isset( $params[
'continue'] ) ) {
78 $continue = intval( $params[
'continue'] );
79 $this->dieContinueUsageIf( $continue < 0 || $continue > count( $titles ) );
80 $titles = array_slice( $titles, $continue,
null,
true );
83 $titleInFileNamespace =
false;
85 foreach ( $titles as $id =>
$t ) {
86 if ( ++$count > $limit ) {
87 $this->setContinueEnumParameter(
'continue', $continue + $count - 1 );
93 $titleInFileNamespace =
true;
95 $params = $this->params;
96 $text = $this->getExtract(
$t );
97 $text = $this->truncate( $text );
98 if ( $params[
'plaintext'] ) {
99 $text = $this->doSections( $text );
101 if ( $params[
'sentences'] ) {
102 $this->addWarning( $this->msg(
'apiwarn-textextracts-sentences-and-html', self::PREFIX ) );
104 $this->addWarning(
'apiwarn-textextracts-malformed-html' );
109 $fit = $result->addValue( [
'query',
'pages', $id ],
'extract', [
'*' => $text ] );
111 $fit = $result->addValue( [
'query',
'pages', $id ],
'extract', $text );
114 $this->setContinueEnumParameter(
'continue', $continue + $count - 1 );
118 if ( $titleInFileNamespace ) {
119 $this->addWarning(
'apiwarn-textextracts-title-in-file-namespace' );
137 $contentModel =
$title->getContentModel();
138 if ( !in_array( $contentModel, $this->supportedContentModels,
true ) ) {
140 'apiwarn-textextracts-unsupportedmodel',
147 $page = WikiPage::factory(
$title );
149 $introOnly = $this->params[
'intro'];
150 $text = $this->getFromCache( $page, $introOnly );
152 if ( $text ===
false && $introOnly ) {
153 $text = $this->getFromCache( $page,
false );
154 if ( $text !==
false ) {
155 $text = $this->getFirstSection( $text, $this->params[
'plaintext'] );
158 if ( $text ===
false ) {
159 $text = $this->parse( $page );
160 $text = $this->convertText( $text );
161 $this->setCache( $page, $text );
167 return $cache->makeKey(
'textextracts', self::CACHE_VERSION,
169 $page->
getTitle()->getPageLanguage()->getPreferredVariant(),
170 $this->params[
'plaintext'], $introOnly
177 $key = $this->cacheKey(
$wgMemc, $page, $introOnly );
184 $key = $this->cacheKey(
$wgMemc, $page, $this->params[
'intro'] );
185 $wgMemc->set( $key, $text, $this->getConfig()->
get(
'ParserCacheExpireTime' ) );
190 $regexp =
'/^(.*?)(?=' . ExtractFormatter::SECTION_MARKER_START .
')/s';
192 $regexp =
'/^(.*?)(?=<h[1-6]\b)/s';
194 if ( preg_match( $regexp, $text,
$matches ) ) {
207 $apiException =
null;
212 $pout = MediaWikiServices::getInstance()->getParserCache()->get( $page, $parserOptions );
214 $text = $pout->getText( [
'unwrap' =>
true ] );
215 if ( $this->params[
'intro'] ) {
216 $text = $this->getFirstSection( $text,
false );
223 'page' => $page->
getTitle()->getPrefixedText(),
226 'sectionpreview' => 1,
227 'wrapoutputclass' =>
'',
229 if ( $this->params[
'intro'] ) {
230 $request[
'section'] = 0;
236 $data = $api->getResult()->getResultData(
null, [
242 if ( $e->
getStatusValue()->hasMessage(
'apierror-nosuchsection' ) ) {
245 unset( $request[
'section'] );
248 $data = $api->getResult()->getResultData(
null, [
258 if ( !array_key_exists(
'parse', $data ) ) {
259 LoggerFactory::getInstance(
'textextracts' )->warning(
260 'API Parse request failed while generating text extract', [
261 'title' => $page->
getTitle()->getFullText(),
262 'url' => $this->getRequest()->getFullRequestURL(),
263 'exception' => $apiException,
264 'request' => $request
269 return $data[
'parse'][
'text'][
'*'];
277 public static function factory( $query, $name ) {
278 $config = MediaWikiServices::getInstance()->getConfigFactory()->makeConfig(
'textextracts' );
279 return new self( $query, $name, $config );
289 $fmt->remove( $this->config->get(
'ExtractsRemoveClasses' ) );
290 $text = $fmt->getText();
300 if ( !$this->params[
'plaintext'] && MWTidy::isEnabled() ) {
306 if ( $this->params[
'chars'] ) {
307 $text = $truncator->getFirstChars( $text, $this->params[
'chars'] ) .
308 $this->msg(
'ellipsis' )->text();
309 } elseif ( $this->params[
'sentences'] ) {
310 $text = $truncator->getFirstSentences( $text, $this->params[
'sentences'] );
317 ExtractFormatter::SECTION_MARKER_START .
'(\d)' .
318 ExtractFormatter::SECTION_MARKER_END .
'(.*)/';
320 switch ( $this->params[
'sectionformat'] ) {
325 return preg_replace_callback( $pattern,
function (
$matches ) {
326 $bars = str_repeat(
'=',
$matches[1] );
327 return "\n$bars " . trim(
$matches[2] ) .
" $bars";
331 return preg_replace_callback( $pattern,
function (
$matches ) {
336 throw new \LogicException(
'Invalid sectionformat' );
364 'plaintext' =>
false,
382 'action=query&prop=extracts&exchars=175&titles=Therion'
383 =>
'apihelp-query+extracts-example-1',
392 return 'https://www.mediawiki.org/wiki/Special:MyLanguage/Extension:TextExtracts#API';
wfEscapeWikiText( $text)
Escapes the given text so that it may be output using addWikiText() without any linking,...
This abstract class implements many basic API functions, and is the base of all API classes.
const PARAM_MAX2
(integer) Max value allowed for the parameter for users with the apihighlimits right,...
const PARAM_MAX
(integer) Max value allowed for the parameter, for PARAM_TYPE 'integer' and 'limit'.
const PARAM_TYPE
(string|string[]) Either an array of allowed value strings, or a string type as described below.
const PARAM_DFLT
(null|boolean|integer|string) Default value of the parameter.
const PARAM_MIN
(integer) Lowest value allowed for the parameter, for PARAM_TYPE 'integer' and 'limit'.
const PARAM_HELP_MSG
(string|array|Message) Specify an alternative i18n documentation message for this parameter.
This is the main API class, used for both external and internal processing.
This is a base class for all Query modules.
Exception used to abort API execution with an error.
getStatusValue()
Fetch the error status.
Class representing a cache/ephemeral data store.
WebRequest clone which takes values from a provided array.
Class to interact with and configure Remex tidy.
Set options of the Parser.
Represents a title within MediaWiki.
The User object encapsulates all of the user-specific settings (user_id, name, rights,...
Class representing a MediaWiki article and history.
shouldCheckParserCache(ParserOptions $parserOptions, $oldId)
Should the parser cache be used?
getTitle()
Get the title object of the article.
getTouched()
Get the page_touched field.
Interface for configuration instances.