Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
0.00% |
0 / 49 |
|
0.00% |
0 / 5 |
CRAP | |
0.00% |
0 / 1 |
Hooks | |
0.00% |
0 / 49 |
|
0.00% |
0 / 5 |
342 | |
0.00% |
0 / 1 |
__construct | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
trimExtract | |
0.00% |
0 / 16 |
|
0.00% |
0 / 1 |
12 | |||
getExtractsData | |
0.00% |
0 / 13 |
|
0.00% |
0 / 1 |
6 | |||
onApiOpenSearchSuggest | |
0.00% |
0 / 9 |
|
0.00% |
0 / 1 |
42 | |||
onSearchResultProvideDescription | |
0.00% |
0 / 10 |
|
0.00% |
0 / 1 |
42 |
1 | <?php |
2 | |
3 | namespace MediaWiki\Extension\TextExtracts; |
4 | |
5 | use Generator; |
6 | use MediaWiki\Api\ApiMain; |
7 | use MediaWiki\Api\ApiResult; |
8 | use MediaWiki\Api\Hook\ApiOpenSearchSuggestHook; |
9 | use MediaWiki\Config\Config; |
10 | use MediaWiki\Config\ConfigFactory; |
11 | use MediaWiki\Request\FauxRequest; |
12 | use MediaWiki\Rest\Hook\SearchResultProvideDescriptionHook; |
13 | |
14 | /** |
15 | * @license GPL-2.0-or-later |
16 | */ |
17 | class Hooks implements |
18 | ApiOpenSearchSuggestHook, |
19 | SearchResultProvideDescriptionHook |
20 | { |
21 | |
22 | private Config $config; |
23 | |
24 | public function __construct( |
25 | ConfigFactory $configFactory |
26 | ) { |
27 | $this->config = $configFactory->makeConfig( 'textextracts' ); |
28 | } |
29 | |
30 | /** |
31 | * Trim an extract to a sensible length. |
32 | * |
33 | * Adapted from Extension:OpenSearchXml, which adapted it from |
34 | * Extension:ActiveAbstract. |
35 | * |
36 | * @param string $text |
37 | * @param int $length Target length; actual result will continue to the end of a sentence. |
38 | * @return string |
39 | */ |
40 | private static function trimExtract( $text, $length ) { |
41 | static $regex = null; |
42 | if ( $regex === null ) { |
43 | $endchars = [ |
44 | // regular ASCII |
45 | '([^\d])\.\s', '\!\s', '\?\s', |
46 | // full-width ideographic full-stop |
47 | '。', |
48 | // double-width roman forms |
49 | '.', '!', '?', |
50 | // half-width ideographic full stop |
51 | '。', |
52 | ]; |
53 | $endgroup = implode( '|', $endchars ); |
54 | $end = "(?:$endgroup)"; |
55 | $sentence = ".{{$length},}?$end+"; |
56 | $regex = "/^($sentence)/u"; |
57 | } |
58 | $matches = []; |
59 | if ( preg_match( $regex, $text, $matches ) ) { |
60 | return trim( $matches[1] ); |
61 | } else { |
62 | // Just return the first line |
63 | return trim( explode( "\n", $text )[0] ); |
64 | } |
65 | } |
66 | |
67 | /** |
68 | * Retrieves extracts data for the given page IDs from the TextExtract API. |
69 | * The page IDs are chunked into the max limit of exlimit of the TextExtract API |
70 | * |
71 | * @param array $pageIds An array of page IDs to retrieve extracts for |
72 | * @return Generator Yields the result data from the API request |
73 | * $data = [ |
74 | * 'pageId' => [ |
75 | * 'ns' => int of the namespace |
76 | * 'title' => string of the title of the page |
77 | * 'extract' => string of the text extracts of the page |
78 | * ] |
79 | * ] |
80 | */ |
81 | private function getExtractsData( array $pageIds ) { |
82 | foreach ( array_chunk( $pageIds, 20 ) as $chunkedPageIds ) { |
83 | $api = new ApiMain( new FauxRequest( |
84 | [ |
85 | 'action' => 'query', |
86 | 'prop' => 'extracts', |
87 | 'explaintext' => true, |
88 | 'exintro' => true, |
89 | 'exlimit' => count( $chunkedPageIds ), |
90 | 'pageids' => implode( '|', $chunkedPageIds ), |
91 | ] |
92 | ) ); |
93 | $api->execute(); |
94 | yield $api->getResult()->getResultData( [ 'query', 'pages' ] ); |
95 | } |
96 | } |
97 | |
98 | /** |
99 | * ApiOpenSearchSuggest hook handler |
100 | * @param array &$results Array of search results |
101 | */ |
102 | public function onApiOpenSearchSuggest( &$results ) { |
103 | if ( !$this->config->get( 'ExtractsExtendOpenSearchXml' ) || $results === [] ) { |
104 | return; |
105 | } |
106 | |
107 | $pageIds = array_keys( $results ); |
108 | foreach ( $this->getExtractsData( $pageIds ) as $data ) { |
109 | foreach ( $pageIds as $id ) { |
110 | $contentKey = $data[$id]['extract'][ApiResult::META_CONTENT] ?? '*'; |
111 | if ( isset( $data[$id]['extract'][$contentKey] ) ) { |
112 | $results[$id]['extract'] = $data[$id]['extract'][$contentKey]; |
113 | $results[$id]['extract trimmed'] = false; |
114 | } |
115 | } |
116 | } |
117 | } |
118 | |
119 | /** |
120 | * Used to update Search Results with descriptions for Search Engine. |
121 | * @param array $pageIdentities Array (string=>SearchResultPageIdentity) where key is pageId |
122 | * @param array &$descriptions Output array (string=>string|null) |
123 | * where key is pageId and value is either a description for given page or null |
124 | */ |
125 | public function onSearchResultProvideDescription( |
126 | array $pageIdentities, |
127 | &$descriptions |
128 | ): void { |
129 | if ( !$this->config->get( 'ExtractsExtendRestSearch' ) || $pageIdentities === [] ) { |
130 | return; |
131 | } |
132 | |
133 | $pageIds = array_map( static function ( $identity ) { |
134 | return $identity->getId(); |
135 | }, $pageIdentities ); |
136 | foreach ( $this->getExtractsData( $pageIds ) as $data ) { |
137 | foreach ( $pageIds as $id ) { |
138 | $contentKey = $data[$id]['extract'][ApiResult::META_CONTENT] ?? '*'; |
139 | if ( isset( $data[$id]['extract'][$contentKey] ) ) { |
140 | $descriptions[$id] = self::trimExtract( $data[$id]['extract'][$contentKey], 150 ); |
141 | } |
142 | } |
143 | } |
144 | } |
145 | } |