Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
| Total | |
97.26% |
71 / 73 |
|
75.00% |
3 / 4 |
CRAP | |
0.00% |
0 / 1 |
| TitleMatcher | |
97.26% |
71 / 73 |
|
75.00% |
3 / 4 |
37 | |
0.00% |
0 / 1 |
| __construct | |
100.00% |
6 / 6 |
|
100.00% |
1 / 1 |
1 | |||
| getNearMatch | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
1 | |||
| getNearMatchResultSet | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| getNearMatchInternal | |
96.83% |
61 / 63 |
|
0.00% |
0 / 1 |
34 | |||
| 1 | <?php |
| 2 | namespace MediaWiki\Search; |
| 3 | |
| 4 | use MediaWiki\FileRepo\RepoGroup; |
| 5 | use MediaWiki\HookContainer\HookContainer; |
| 6 | use MediaWiki\HookContainer\HookRunner; |
| 7 | use MediaWiki\Language\ILanguageConverter; |
| 8 | use MediaWiki\Language\Language; |
| 9 | use MediaWiki\Languages\LanguageConverterFactory; |
| 10 | use MediaWiki\Page\WikiPageFactory; |
| 11 | use MediaWiki\Title\Title; |
| 12 | use MediaWiki\Title\TitleFactory; |
| 13 | use UtfNormal\Validator; |
| 14 | |
| 15 | /** |
| 16 | * Service implementation of near match title search. |
| 17 | */ |
| 18 | class TitleMatcher { |
| 19 | |
| 20 | private Language $language; |
| 21 | private ILanguageConverter $languageConverter; |
| 22 | private HookRunner $hookRunner; |
| 23 | private WikiPageFactory $wikiPageFactory; |
| 24 | private RepoGroup $repoGroup; |
| 25 | private TitleFactory $titleFactory; |
| 26 | |
| 27 | public function __construct( |
| 28 | Language $contentLanguage, |
| 29 | LanguageConverterFactory $languageConverterFactory, |
| 30 | HookContainer $hookContainer, |
| 31 | WikiPageFactory $wikiPageFactory, |
| 32 | RepoGroup $repoGroup, |
| 33 | TitleFactory $titleFactory |
| 34 | ) { |
| 35 | $this->language = $contentLanguage; |
| 36 | $this->languageConverter = $languageConverterFactory->getLanguageConverter( $contentLanguage ); |
| 37 | $this->hookRunner = new HookRunner( $hookContainer ); |
| 38 | $this->wikiPageFactory = $wikiPageFactory; |
| 39 | $this->repoGroup = $repoGroup; |
| 40 | $this->titleFactory = $titleFactory; |
| 41 | } |
| 42 | |
| 43 | /** |
| 44 | * If an exact title match can be found, or a very slightly close match, |
| 45 | * return the title. If no match, returns NULL. |
| 46 | * |
| 47 | * @param string $searchterm |
| 48 | * @return Title |
| 49 | */ |
| 50 | public function getNearMatch( $searchterm ) { |
| 51 | $title = $this->getNearMatchInternal( $searchterm ); |
| 52 | |
| 53 | $this->hookRunner->onSearchGetNearMatchComplete( $searchterm, $title ); |
| 54 | return $title; |
| 55 | } |
| 56 | |
| 57 | /** |
| 58 | * Do a near match (see SearchEngine::getNearMatch) and wrap it into a |
| 59 | * ISearchResultSet. |
| 60 | * |
| 61 | * @param string $searchterm |
| 62 | * @return ISearchResultSet |
| 63 | */ |
| 64 | public function getNearMatchResultSet( $searchterm ) { |
| 65 | return new SearchNearMatchResultSet( $this->getNearMatch( $searchterm ) ); |
| 66 | } |
| 67 | |
| 68 | /** |
| 69 | * Really find the title match. |
| 70 | * @param string $searchterm |
| 71 | * @return null|Title |
| 72 | */ |
| 73 | protected function getNearMatchInternal( $searchterm ) { |
| 74 | $allSearchTerms = [ $searchterm ]; |
| 75 | |
| 76 | if ( $this->languageConverter->hasVariants() ) { |
| 77 | $allSearchTerms = array_unique( array_merge( |
| 78 | $allSearchTerms, |
| 79 | $this->languageConverter->autoConvertToAllVariants( $searchterm ) |
| 80 | ) ); |
| 81 | } |
| 82 | |
| 83 | $titleResult = null; |
| 84 | if ( !$this->hookRunner->onSearchGetNearMatchBefore( $allSearchTerms, $titleResult ) ) { |
| 85 | return $titleResult; |
| 86 | } |
| 87 | |
| 88 | // Most of our handling here deals with finding a valid title for the search term, |
| 89 | // but almost anything starting with '#' is "valid" and points to Main_Page#searchterm. |
| 90 | // Rather than doing something completely wrong, do nothing. |
| 91 | if ( $searchterm === '' || $searchterm[0] === '#' ) { |
| 92 | return null; |
| 93 | } |
| 94 | |
| 95 | foreach ( $allSearchTerms as $term ) { |
| 96 | # Exact match? No need to look further. |
| 97 | $title = $this->titleFactory->newFromText( $term ); |
| 98 | if ( $title === null ) { |
| 99 | return null; |
| 100 | } |
| 101 | |
| 102 | # Try files if searching in the Media: namespace |
| 103 | if ( $title->getNamespace() === NS_MEDIA ) { |
| 104 | $title = Title::makeTitle( NS_FILE, $title->getText() ); |
| 105 | } |
| 106 | |
| 107 | if ( $title->isSpecialPage() || $title->isExternal() || $title->exists() ) { |
| 108 | return $title; |
| 109 | } |
| 110 | |
| 111 | # See if it still otherwise has content is some sensible sense |
| 112 | if ( $title->canExist() ) { |
| 113 | $page = $this->wikiPageFactory->newFromTitle( $title ); |
| 114 | if ( $page->hasViewableContent() ) { |
| 115 | return $title; |
| 116 | } |
| 117 | } |
| 118 | |
| 119 | if ( !$this->hookRunner->onSearchAfterNoDirectMatch( $term, $title ) ) { |
| 120 | return $title; |
| 121 | } |
| 122 | |
| 123 | # Now try all lower case (=> first letter capitalized on some wikis) |
| 124 | $title = $this->titleFactory->newFromText( $this->language->lc( $term ) ); |
| 125 | if ( $title && $title->exists() ) { |
| 126 | return $title; |
| 127 | } |
| 128 | |
| 129 | # Now try normalized lowercase (if it's different) |
| 130 | $normTerm = Validator::toNFKC( $term ); |
| 131 | $normDiff = $normTerm !== $term; |
| 132 | if ( $normDiff ) { |
| 133 | $title = $this->titleFactory->newFromText( $this->language->lc( $normTerm ) ); |
| 134 | if ( $title && $title->exists() ) { |
| 135 | return $title; |
| 136 | } |
| 137 | } |
| 138 | |
| 139 | # Now try capitalized string |
| 140 | $title = $this->titleFactory->newFromText( $this->language->ucwords( $term ) ); |
| 141 | if ( $title && $title->exists() ) { |
| 142 | return $title; |
| 143 | } |
| 144 | |
| 145 | # Now try normalized capitalized (if it's different) |
| 146 | if ( $normDiff ) { |
| 147 | $title = $this->titleFactory->newFromText( $this->language->ucwords( $normTerm ) ); |
| 148 | if ( $title && $title->exists() ) { |
| 149 | return $title; |
| 150 | } |
| 151 | } |
| 152 | |
| 153 | # Now try all upper case |
| 154 | $title = $this->titleFactory->newFromText( $this->language->uc( $term ) ); |
| 155 | if ( $title && $title->exists() ) { |
| 156 | return $title; |
| 157 | } |
| 158 | |
| 159 | # Now try Word-Caps-Breaking-At-Word-Breaks, for hyphenated names etc |
| 160 | $title = $this->titleFactory->newFromText( $this->language->ucwordbreaks( $term ) ); |
| 161 | if ( $title && $title->exists() ) { |
| 162 | return $title; |
| 163 | } |
| 164 | |
| 165 | // Give hooks a chance at better match variants |
| 166 | $title = null; |
| 167 | // @phan-suppress-next-line PhanTypeMismatchArgument Type mismatch on pass-by-ref args |
| 168 | if ( !$this->hookRunner->onSearchGetNearMatch( $term, $title ) ) { |
| 169 | return $title; |
| 170 | } |
| 171 | } |
| 172 | |
| 173 | $title = $this->titleFactory->newFromTextThrow( $searchterm ); |
| 174 | |
| 175 | # Entering a user goes to the user page whether it's there or not |
| 176 | if ( $title->getNamespace() === NS_USER ) { |
| 177 | return $title; |
| 178 | } |
| 179 | |
| 180 | # Go to images that exist even if there's no local page. |
| 181 | # There may have been a funny upload, or it may be on a shared |
| 182 | # file repository such as Wikimedia Commons. |
| 183 | if ( $title->getNamespace() === NS_FILE ) { |
| 184 | $image = $this->repoGroup->findFile( $title ); |
| 185 | if ( $image ) { |
| 186 | return $title; |
| 187 | } |
| 188 | } |
| 189 | |
| 190 | # MediaWiki namespace? Page may be "implied" if not customized. |
| 191 | # Just return it, with caps forced as the message system likes it. |
| 192 | if ( $title->getNamespace() === NS_MEDIAWIKI ) { |
| 193 | return Title::makeTitle( NS_MEDIAWIKI, $this->language->ucfirst( $title->getText() ) ); |
| 194 | } |
| 195 | |
| 196 | # Quoted term? Try without the quotes... |
| 197 | $matches = []; |
| 198 | if ( preg_match( '/^"([^"]+)"$/', $searchterm, $matches ) ) { |
| 199 | return $this->getNearMatch( $matches[1] ); |
| 200 | } |
| 201 | |
| 202 | return null; |
| 203 | } |
| 204 | } |