29 const CACHE_VERSION = 2;
44 private $supportedContentModels = [
'wikitext' ];
52 parent::__construct( $query, $moduleName, self::PREFIX );
53 $this->config = $conf;
62 $titles = $this->getPageSet()->getGoodTitles();
63 if ( $titles === [] ) {
66 $isXml = $this->getMain()->isInternalMode()
67 || $this->getMain()->getPrinter()->getFormat() ==
'XML';
68 $result = $this->getResult();
69 $params = $this->params = $this->extractRequestParams();
70 $this->requireMaxOneParameter( $params,
'chars',
'sentences' );
72 $limit = intval( $params[
'limit'] );
73 if ( $limit > 1 && !$params[
'intro'] ) {
75 $this->addWarning( [
'apiwarn-textextracts-limit', $limit ] );
77 if ( isset( $params[
'continue'] ) ) {
78 $continue = intval( $params[
'continue'] );
79 $this->dieContinueUsageIf( $continue < 0 || $continue > count( $titles ) );
80 $titles = array_slice( $titles, $continue,
null,
true );
83 $titleInFileNamespace =
false;
85 foreach ( $titles as $id =>
$t ) {
86 if ( ++$count > $limit ) {
87 $this->setContinueEnumParameter(
'continue', $continue + $count - 1 );
93 $titleInFileNamespace =
true;
95 $params = $this->params;
96 $text = $this->getExtract(
$t );
97 $text = $this->truncate( $text );
98 if ( $params[
'plaintext'] ) {
99 $text = $this->doSections( $text );
101 if ( $params[
'sentences'] ) {
102 $this->addWarning( $this->msg(
'apiwarn-textextracts-sentences-and-html', self::PREFIX ) );
104 $this->addWarning(
'apiwarn-textextracts-malformed-html' );
109 $fit = $result->addValue( [
'query',
'pages', $id ],
'extract', [
'*' => $text ] );
111 $fit = $result->addValue( [
'query',
'pages', $id ],
'extract', $text );
114 $this->setContinueEnumParameter(
'continue', $continue + $count - 1 );
118 if ( $titleInFileNamespace ) {
119 $this->addWarning(
'apiwarn-textextracts-title-in-file-namespace' );
137 $contentModel =
$title->getContentModel();
138 if ( !in_array( $contentModel, $this->supportedContentModels,
true ) ) {
140 'apiwarn-textextracts-unsupportedmodel',
149 $introOnly = $this->params[
'intro'];
150 $text = $this->getFromCache( $page, $introOnly );
152 if ( $text ===
false && $introOnly ) {
153 $text = $this->getFromCache( $page,
false );
154 if ( $text !==
false ) {
155 $text = $this->getFirstSection( $text, $this->params[
'plaintext'] );
158 if ( $text ===
false ) {
159 $text = $this->parse( $page );
160 $text = $this->convertText( $text );
161 $this->setCache( $page, $text );
167 return $cache->makeKey(
'textextracts', self::CACHE_VERSION,
169 $page->
getTitle()->getPageLanguage()->getPreferredVariant(),
170 $this->params[
'plaintext'], $introOnly
177 $key = $this->cacheKey(
$wgMemc, $page, $introOnly );
184 $key = $this->cacheKey(
$wgMemc, $page, $this->params[
'intro'] );
185 $wgMemc->set( $key, $text, $this->getConfig()->
get(
'ParserCacheExpireTime' ) );
190 $regexp =
'/^(.*?)(?=' . ExtractFormatter::SECTION_MARKER_START .
')/s';
192 $regexp =
'/^(.*?)(?=<h[1-6]\b)/s';
194 if ( preg_match( $regexp, $text,
$matches ) ) {
207 $apiException =
null;
212 $pout = MediaWikiServices::getInstance()->getParserCache()->get( $page, $parserOptions );
214 $text = $pout->getText( [
'unwrap' =>
true ] );
215 if ( $this->params[
'intro'] ) {
216 $text = $this->getFirstSection( $text,
false );
223 'page' => $page->
getTitle()->getPrefixedText(),
226 'sectionpreview' => 1,
227 'wrapoutputclass' =>
'',
229 if ( $this->params[
'intro'] ) {
230 $request[
'section'] = 0;
236 $data = $api->getResult()->getResultData(
null, [
242 if ( $e->
getStatusValue()->hasMessage(
'apierror-nosuchsection' ) ) {
245 unset( $request[
'section'] );
248 $data = $api->getResult()->getResultData(
null, [
258 if ( !array_key_exists(
'parse', $data ) ) {
259 LoggerFactory::getInstance(
'textextracts' )->warning(
260 'API Parse request failed while generating text extract', [
261 'title' => $page->
getTitle()->getFullText(),
262 'url' => $this->getRequest()->getFullRequestURL(),
263 'exception' => $apiException,
264 'request' => $request
269 return $data[
'parse'][
'text'][
'*'];
277 public static function factory( $query, $name ) {
278 $config = MediaWikiServices::getInstance()->getConfigFactory()->makeConfig(
'textextracts' );
279 return new self( $query, $name, $config );
289 $fmt->remove( $this->config->get(
'ExtractsRemoveClasses' ) );
290 $text = $fmt->getText();
306 if ( $this->params[
'chars'] ) {
307 $text = $truncator->getFirstChars( $text, $this->params[
'chars'] ) .
308 $this->msg(
'ellipsis' )->text();
309 } elseif ( $this->params[
'sentences'] ) {
310 $text = $truncator->getFirstSentences( $text, $this->params[
'sentences'] );
317 ExtractFormatter::SECTION_MARKER_START .
'(\d)' .
318 ExtractFormatter::SECTION_MARKER_END .
'(.*)/';
320 switch ( $this->params[
'sectionformat'] ) {
325 return preg_replace_callback( $pattern,
function (
$matches ) {
326 $bars = str_repeat(
'=',
$matches[1] );
327 return "\n$bars " . trim(
$matches[2] ) .
" $bars";
331 return preg_replace_callback( $pattern,
function (
$matches ) {
336 throw new \LogicException(
'Invalid sectionformat' );
364 'plaintext' =>
false,
382 'action=query&prop=extracts&exchars=175&titles=Therion'
383 =>
'apihelp-query+extracts-example-1',
392 return 'https://www.mediawiki.org/wiki/Special:MyLanguage/Extension:TextExtracts#API';