Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
97.26% covered (success)
97.26%
71 / 73
75.00% covered (warning)
75.00%
3 / 4
CRAP
0.00% covered (danger)
0.00%
0 / 1
TitleMatcher
97.26% covered (success)
97.26%
71 / 73
75.00% covered (warning)
75.00%
3 / 4
37
0.00% covered (danger)
0.00%
0 / 1
 __construct
100.00% covered (success)
100.00%
6 / 6
100.00% covered (success)
100.00%
1 / 1
1
 getNearMatch
100.00% covered (success)
100.00%
3 / 3
100.00% covered (success)
100.00%
1 / 1
1
 getNearMatchResultSet
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 getNearMatchInternal
96.83% covered (success)
96.83%
61 / 63
0.00% covered (danger)
0.00%
0 / 1
34
1<?php
2namespace MediaWiki\Search;
3
4use MediaWiki\FileRepo\RepoGroup;
5use MediaWiki\HookContainer\HookContainer;
6use MediaWiki\HookContainer\HookRunner;
7use MediaWiki\Language\ILanguageConverter;
8use MediaWiki\Language\Language;
9use MediaWiki\Languages\LanguageConverterFactory;
10use MediaWiki\Page\WikiPageFactory;
11use MediaWiki\Title\Title;
12use MediaWiki\Title\TitleFactory;
13use UtfNormal\Validator;
14
15/**
16 * Service implementation of near match title search.
17 */
18class TitleMatcher {
19
20    private Language $language;
21    private ILanguageConverter $languageConverter;
22    private HookRunner $hookRunner;
23    private WikiPageFactory $wikiPageFactory;
24    private RepoGroup $repoGroup;
25    private TitleFactory $titleFactory;
26
27    public function __construct(
28        Language $contentLanguage,
29        LanguageConverterFactory $languageConverterFactory,
30        HookContainer $hookContainer,
31        WikiPageFactory $wikiPageFactory,
32        RepoGroup $repoGroup,
33        TitleFactory $titleFactory
34    ) {
35        $this->language = $contentLanguage;
36        $this->languageConverter = $languageConverterFactory->getLanguageConverter( $contentLanguage );
37        $this->hookRunner = new HookRunner( $hookContainer );
38        $this->wikiPageFactory = $wikiPageFactory;
39        $this->repoGroup = $repoGroup;
40        $this->titleFactory = $titleFactory;
41    }
42
43    /**
44     * If an exact title match can be found, or a very slightly close match,
45     * return the title. If no match, returns NULL.
46     *
47     * @param string $searchterm
48     * @return Title
49     */
50    public function getNearMatch( $searchterm ) {
51        $title = $this->getNearMatchInternal( $searchterm );
52
53        $this->hookRunner->onSearchGetNearMatchComplete( $searchterm, $title );
54        return $title;
55    }
56
57    /**
58     * Do a near match (see SearchEngine::getNearMatch) and wrap it into a
59     * ISearchResultSet.
60     *
61     * @param string $searchterm
62     * @return ISearchResultSet
63     */
64    public function getNearMatchResultSet( $searchterm ) {
65        return new SearchNearMatchResultSet( $this->getNearMatch( $searchterm ) );
66    }
67
68    /**
69     * Really find the title match.
70     * @param string $searchterm
71     * @return null|Title
72     */
73    protected function getNearMatchInternal( $searchterm ) {
74        $allSearchTerms = [ $searchterm ];
75
76        if ( $this->languageConverter->hasVariants() ) {
77            $allSearchTerms = array_unique( array_merge(
78                $allSearchTerms,
79                $this->languageConverter->autoConvertToAllVariants( $searchterm )
80            ) );
81        }
82
83        $titleResult = null;
84        if ( !$this->hookRunner->onSearchGetNearMatchBefore( $allSearchTerms, $titleResult ) ) {
85            return $titleResult;
86        }
87
88        // Most of our handling here deals with finding a valid title for the search term,
89        // but almost anything starting with '#' is "valid" and points to Main_Page#searchterm.
90        // Rather than doing something completely wrong, do nothing.
91        if ( $searchterm === '' || $searchterm[0] === '#' ) {
92            return null;
93        }
94
95        foreach ( $allSearchTerms as $term ) {
96            # Exact match? No need to look further.
97            $title = $this->titleFactory->newFromText( $term );
98            if ( $title === null ) {
99                return null;
100            }
101
102            # Try files if searching in the Media: namespace
103            if ( $title->getNamespace() === NS_MEDIA ) {
104                $title = Title::makeTitle( NS_FILE, $title->getText() );
105            }
106
107            if ( $title->isSpecialPage() || $title->isExternal() || $title->exists() ) {
108                return $title;
109            }
110
111            # See if it still otherwise has content is some sensible sense
112            if ( $title->canExist() ) {
113                $page = $this->wikiPageFactory->newFromTitle( $title );
114                if ( $page->hasViewableContent() ) {
115                    return $title;
116                }
117            }
118
119            if ( !$this->hookRunner->onSearchAfterNoDirectMatch( $term, $title ) ) {
120                return $title;
121            }
122
123            # Now try all lower case (=> first letter capitalized on some wikis)
124            $title = $this->titleFactory->newFromText( $this->language->lc( $term ) );
125            if ( $title && $title->exists() ) {
126                return $title;
127            }
128
129            # Now try normalized lowercase (if it's different)
130            $normTerm = Validator::toNFKC( $term );
131            $normDiff = $normTerm !== $term;
132            if ( $normDiff ) {
133                $title = $this->titleFactory->newFromText( $this->language->lc( $normTerm ) );
134                if ( $title && $title->exists() ) {
135                    return $title;
136                }
137            }
138
139            # Now try capitalized string
140            $title = $this->titleFactory->newFromText( $this->language->ucwords( $term ) );
141            if ( $title && $title->exists() ) {
142                return $title;
143            }
144
145            # Now try normalized capitalized (if it's different)
146            if ( $normDiff ) {
147                $title = $this->titleFactory->newFromText( $this->language->ucwords( $normTerm ) );
148                if ( $title && $title->exists() ) {
149                    return $title;
150                }
151            }
152
153            # Now try all upper case
154            $title = $this->titleFactory->newFromText( $this->language->uc( $term ) );
155            if ( $title && $title->exists() ) {
156                return $title;
157            }
158
159            # Now try Word-Caps-Breaking-At-Word-Breaks, for hyphenated names etc
160            $title = $this->titleFactory->newFromText( $this->language->ucwordbreaks( $term ) );
161            if ( $title && $title->exists() ) {
162                return $title;
163            }
164
165            // Give hooks a chance at better match variants
166            $title = null;
167            // @phan-suppress-next-line PhanTypeMismatchArgument Type mismatch on pass-by-ref args
168            if ( !$this->hookRunner->onSearchGetNearMatch( $term, $title ) ) {
169                return $title;
170            }
171        }
172
173        $title = $this->titleFactory->newFromTextThrow( $searchterm );
174
175        # Entering a user goes to the user page whether it's there or not
176        if ( $title->getNamespace() === NS_USER ) {
177            return $title;
178        }
179
180        # Go to images that exist even if there's no local page.
181        # There may have been a funny upload, or it may be on a shared
182        # file repository such as Wikimedia Commons.
183        if ( $title->getNamespace() === NS_FILE ) {
184            $image = $this->repoGroup->findFile( $title );
185            if ( $image ) {
186                return $title;
187            }
188        }
189
190        # MediaWiki namespace? Page may be "implied" if not customized.
191        # Just return it, with caps forced as the message system likes it.
192        if ( $title->getNamespace() === NS_MEDIAWIKI ) {
193            return Title::makeTitle( NS_MEDIAWIKI, $this->language->ucfirst( $title->getText() ) );
194        }
195
196        # Quoted term? Try without the quotes...
197        $matches = [];
198        if ( preg_match( '/^"([^"]+)"$/', $searchterm, $matches ) ) {
199            return $this->getNearMatch( $matches[1] );
200        }
201
202        return null;
203    }
204}