Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
| Total | |
53.62% |
37 / 69 |
|
25.00% |
2 / 8 |
CRAP | |
0.00% |
0 / 1 |
| FancyTitleResultsType | |
53.62% |
37 / 69 |
|
25.00% |
2 / 8 |
75.77 | |
0.00% |
0 / 1 |
| __construct | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
| getSourceFiltering | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
| getHighlightingConfiguration | |
0.00% |
0 / 20 |
|
0.00% |
0 / 1 |
2 | |||
| transformElasticsearchResult | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
6 | |||
| chooseBestTitleOrRedirect | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| createEmptyResult | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
| transformOneElasticResult | |
86.67% |
26 / 30 |
|
0.00% |
0 / 1 |
10.24 | |||
| resolveRedirectHighlight | |
80.00% |
8 / 10 |
|
0.00% |
0 / 1 |
6.29 | |||
| 1 | <?php |
| 2 | |
| 3 | namespace CirrusSearch\Search; |
| 4 | |
| 5 | use Elastica\ResultSet as ElasticaResultSet; |
| 6 | use MediaWiki\Logger\LoggerFactory; |
| 7 | use MediaWiki\Title\Title; |
| 8 | |
| 9 | /** |
| 10 | * Returns titles categorized based on how they matched - redirect or name. |
| 11 | */ |
| 12 | class FancyTitleResultsType extends TitleResultsType { |
| 13 | /** @var string */ |
| 14 | private $matchedAnalyzer; |
| 15 | |
| 16 | /** |
| 17 | * Build result type. The matchedAnalyzer is required to detect if the match |
| 18 | * was from the title or a redirect (and is kind of a leaky abstraction.) |
| 19 | * |
| 20 | * @param string $matchedAnalyzer the analyzer used to match the title |
| 21 | * @param TitleHelper|null $titleHelper |
| 22 | */ |
| 23 | public function __construct( $matchedAnalyzer, ?TitleHelper $titleHelper = null ) { |
| 24 | parent::__construct( $titleHelper ); |
| 25 | $this->matchedAnalyzer = $matchedAnalyzer; |
| 26 | } |
| 27 | |
| 28 | /** @inheritDoc */ |
| 29 | public function getSourceFiltering() { |
| 30 | return [ 'namespace', 'title', 'namespace_text', 'wiki', 'redirect' ]; |
| 31 | } |
| 32 | |
| 33 | /** |
| 34 | * @param array $extraHighlightFields |
| 35 | * @return array|null |
| 36 | */ |
| 37 | public function getHighlightingConfiguration( array $extraHighlightFields = [] ) { |
| 38 | $entireValue = [ |
| 39 | 'type' => 'unified', |
| 40 | 'number_of_fragments' => 0, |
| 41 | ]; |
| 42 | $manyValues = [ |
| 43 | 'type' => 'unified', |
| 44 | 'fragment_size' => 10000, // We want the whole value but more than this is crazy |
| 45 | 'number_of_fragments' => 30, |
| 46 | 'order' => 'score', |
| 47 | ]; |
| 48 | return [ |
| 49 | // we don't really care about the actual portion of the title that matched, the UI |
| 50 | // is generally responsible for doing this. |
| 51 | 'pre_tags' => [ "" ], |
| 52 | 'post_tags' => [ "" ], |
| 53 | 'fields' => [ |
| 54 | "title.$this->matchedAnalyzer" => $entireValue, |
| 55 | "title.{$this->matchedAnalyzer}_asciifolding" => $entireValue, |
| 56 | "redirect.title.$this->matchedAnalyzer" => $manyValues, |
| 57 | "redirect.title.{$this->matchedAnalyzer}_asciifolding" => $manyValues, |
| 58 | ], |
| 59 | ]; |
| 60 | } |
| 61 | |
| 62 | /** |
| 63 | * Convert the results to titles. |
| 64 | * |
| 65 | * @param ElasticaResultSet $resultSet |
| 66 | * @return array[] Array of arrays, each with optional keys: |
| 67 | * titleMatch => a title if the title matched |
| 68 | * redirectMatches => an array of redirect matches, one per matched redirect |
| 69 | */ |
| 70 | public function transformElasticsearchResult( ElasticaResultSet $resultSet ) { |
| 71 | $results = []; |
| 72 | foreach ( $resultSet->getResults() as $r ) { |
| 73 | $results[] = $this->transformOneElasticResult( $r ); |
| 74 | } |
| 75 | return $results; |
| 76 | } |
| 77 | |
| 78 | /** |
| 79 | * Finds best title or redirect |
| 80 | * @param array $match array returned by self::transformOneElasticResult |
| 81 | * @return Title|false choose best |
| 82 | */ |
| 83 | public static function chooseBestTitleOrRedirect( array $match ) { |
| 84 | // TODO maybe dig around in the redirect matches and find the best one? |
| 85 | return $match['titleMatch'] ?? $match['redirectMatches'][0] ?? false; |
| 86 | } |
| 87 | |
| 88 | /** |
| 89 | * @return array |
| 90 | */ |
| 91 | public function createEmptyResult() { |
| 92 | return []; |
| 93 | } |
| 94 | |
| 95 | /** |
| 96 | * Transform a result from elastic into an array of Titles. |
| 97 | * |
| 98 | * @param \Elastica\Result $r |
| 99 | * @param int[] $namespaces Prefer |
| 100 | * @return Title[] with the following keys : |
| 101 | * titleMatch => a title if the title matched |
| 102 | * redirectMatches => an array of redirect matches, one per matched redirect |
| 103 | */ |
| 104 | public function transformOneElasticResult( \Elastica\Result $r, array $namespaces = [] ) { |
| 105 | $title = $this->getTitleHelper()->makeTitle( $r ); |
| 106 | $highlights = $r->getHighlights(); |
| 107 | $resultForTitle = []; |
| 108 | |
| 109 | // Now we have to use the highlights to figure out whether it was the title or the redirect |
| 110 | // that matched. It is kind of a shame we can't really give the highlighting to the client |
| 111 | // though. |
| 112 | if ( isset( $highlights["title.$this->matchedAnalyzer"] ) ) { |
| 113 | $resultForTitle['titleMatch'] = $title; |
| 114 | } elseif ( isset( $highlights["title.{$this->matchedAnalyzer}_asciifolding"] ) ) { |
| 115 | $resultForTitle['titleMatch'] = $title; |
| 116 | } |
| 117 | $redirectHighlights = []; |
| 118 | |
| 119 | if ( isset( $highlights["redirect.title.$this->matchedAnalyzer"] ) ) { |
| 120 | $redirectHighlights = $highlights["redirect.title.$this->matchedAnalyzer"]; |
| 121 | } |
| 122 | if ( isset( $highlights["redirect.title.{$this->matchedAnalyzer}_asciifolding"] ) ) { |
| 123 | $redirectHighlights = |
| 124 | array_merge( $redirectHighlights, |
| 125 | $highlights["redirect.title.{$this->matchedAnalyzer}_asciifolding"] ); |
| 126 | } |
| 127 | if ( $redirectHighlights !== [] ) { |
| 128 | $source = $r->getSource(); |
| 129 | $docRedirects = []; |
| 130 | if ( isset( $source['redirect'] ) ) { |
| 131 | foreach ( $source['redirect'] as $docRedir ) { |
| 132 | $docRedirects[$docRedir['title']][] = $docRedir; |
| 133 | } |
| 134 | } |
| 135 | foreach ( $redirectHighlights as $redirectTitleString ) { |
| 136 | $resultForTitle['redirectMatches'][] = $this->resolveRedirectHighlight( |
| 137 | $r, $redirectTitleString, $docRedirects, $namespaces ); |
| 138 | } |
| 139 | } |
| 140 | if ( $resultForTitle === [] ) { |
| 141 | // We're not really sure where the match came from so lets just pretend it was the title. |
| 142 | LoggerFactory::getInstance( 'CirrusSearch' ) |
| 143 | ->warning( "Title search result type hit a match but we can't " . |
| 144 | "figure out what caused the match: {namespace}:{title}", |
| 145 | [ 'namespace' => $r->namespace, 'title' => $r->title ] ); |
| 146 | $resultForTitle['titleMatch'] = $title; |
| 147 | } |
| 148 | |
| 149 | return $resultForTitle; |
| 150 | } |
| 151 | |
| 152 | /** |
| 153 | * @param \Elastica\Result $r Elasticsearch result |
| 154 | * @param string $redirectTitleString Highlighted string returned from elasticsearch |
| 155 | * @param array $docRedirects Map from title string to list of redirects from elasticsearch source document |
| 156 | * @param int[] $namespaces Prefered namespaces to source redirects from |
| 157 | * @return Title |
| 158 | */ |
| 159 | private function resolveRedirectHighlight( \Elastica\Result $r, $redirectTitleString, array $docRedirects, $namespaces ) { |
| 160 | // The match was against a redirect so we should replace the $title with one that |
| 161 | // represents the redirect. |
| 162 | if ( !isset( $docRedirects[$redirectTitleString] ) ) { |
| 163 | // Instead of getting the redirect's real namespace we're going to just use the namespace |
| 164 | // of the title. This is not great. |
| 165 | // TODO: Should we just bail at this point? |
| 166 | return $this->getTitleHelper()->makeRedirectTitle( $r, $redirectTitleString, $r->namespace ); |
| 167 | } |
| 168 | |
| 169 | $redirs = $docRedirects[$redirectTitleString]; |
| 170 | if ( count( $redirs ) === 1 ) { |
| 171 | // may or may not be the right namespace, but we don't seem to have any other options. |
| 172 | return $this->getTitleHelper()->makeRedirectTitle( $r, $redirectTitleString, $redirs[0]['namespace'] ); |
| 173 | } |
| 174 | |
| 175 | if ( $namespaces ) { |
| 176 | foreach ( $redirs as $redir ) { |
| 177 | if ( array_search( $redir['namespace'], $namespaces ) ) { |
| 178 | return $this->getTitleHelper()->makeRedirectTitle( $r, $redirectTitleString, $redir['namespace'] ); |
| 179 | } |
| 180 | } |
| 181 | } |
| 182 | // Multiple redirects with same text from different namespaces, but none of them match the requested namespaces. What now? |
| 183 | return $this->getTitleHelper()->makeRedirectTitle( $r, $redirectTitleString, $redirs[0]['namespace'] ); |
| 184 | } |
| 185 | } |