35 private $latestPage =
null;
53 $searchEngineConfig = $services->getSearchEngineConfig();
56 LoggerFactory::getInstance(
"search" )
57 ->debug(
"Skipping update: search updates disabled by config" );
61 $seFactory = $services->getSearchEngineFactory();
62 foreach ( $searchEngineConfig->getSearchTypes() as $type ) {
63 $search = $seFactory->create( $type );
64 if ( !$search->supports(
'search-update' ) ) {
68 $normalTitle = $this->getNormalizedTitle( $search );
70 if ( $this->getLatestPage() ===
null ) {
71 $search->delete( $this->
id, $normalTitle );
74 if ( $this->content ===
null ) {
75 $search->updateTitle( $this->
id, $normalTitle );
79 $text = $this->content->getTextForSearchIndex();
82 # Perform the actual update
83 $search->update( $this->
id, $normalTitle, $search->normalizeText( $text ) );
97 $contLang = $services->getContentLanguage();
98 # Language-specific strip/conversion
99 $text = $contLang->normalizeForSearch( $text );
100 $se = $se ?: $services->newSearchEngine();
101 $lc = $se->legalSearchChars() .
'&#;';
104 $text = preg_replace(
"/<\\/?\\s*[A-Za-z][^>]*?>/",
105 ' ', $contLang->lc(
" " . $text .
" " ) );
106 $text = preg_replace(
"/(^|\\n)==\\s*([^\\n]+)\\s*==(\\s)/",
107 "\\1\\2 \\2 \\2\\3", $text ); # Emphasize headings
109 # Strip external URLs
110 $uc =
"A-Za-z0-9_\\/:.,~%\\-+&;#?!=()@\\x80-\\xFF";
111 $protos =
"http|https|ftp|mailto|news|gopher";
112 $pat =
"/(^|[^\\[])({$protos}):[{$uc}]+([^{$uc}]|$)/";
113 $text = preg_replace( $pat,
"\\1 \\3", $text );
115 $p1 =
"/([^\\[])\\[({$protos}):[{$uc}]+]/";
116 $p2 =
"/([^\\[])\\[({$protos}):[{$uc}]+\\s+([^\\]]+)]/";
117 $text = preg_replace( $p1,
"\\1 ", $text );
118 $text = preg_replace( $p2,
"\\1 \\3 ", $text );
120 $text = preg_replace(
"/([^{$lc}])([{$lc}]+)]]([a-z]+)/",
121 "\\1\\2 \\2\\3", $text ); # Handle [[game]]s
123 # Strip all remaining non-search characters
124 $text = preg_replace(
"/[^{$lc}]+/",
" ", $text );
142 $text = strrev( preg_replace(
"/ s'([{$lc}]+)/",
" s'\\1 \\1", strrev( $text ) ) );
143 $text = strrev( preg_replace(
"/ 's([{$lc}]+)/",
" s\\1", strrev( $text ) ) );
145 # Strip wiki '' and '''
146 $text = preg_replace(
"/''[']*/",
" ", $text );
160 private function getLatestPage() {
161 if ( !$this->latestPage ) {
163 ->getPageById( $this->
id, IDBAccessObject::READ_LATEST );
166 return $this->latestPage;
176 private function getNormalizedTitle( SearchEngine $search ) {
178 $title = Title::newFromPageIdentity( $this->page )->getText();
180 $lc = $search->legalSearchChars() .
'&#;';
181 $t = $contLang->normalizeForSearch( $title );
182 $t = preg_replace(
"/[^{$lc}]+/",
' ', $t );
183 $t = $contLang->lc( $t );
185 if ( $this->page->getNamespace() ===
NS_FILE ) {
186 $t = preg_replace(
"/([{$lc}]+)\\.(\\w{1,4})$/",
"\\1 \\1.\\2", $t );
190 $t = preg_replace(
"/([{$lc}]+)'s( |$)/",
"\\1 \\1's ", $t );
191 $t = preg_replace(
"/([{$lc}]+)s'( |$)/",
"\\1s ", $t );
193 $t = preg_replace(
"/\\s+/",
' ', $t );
195 return $search->normalizeText( trim( $t ) );
A class containing constants representing the names of configuration variables.
const DisableSearchUpdate
Name constant for the DisableSearchUpdate setting, for use with Config::get()
Content objects represent page content, e.g.
Interface for objects (potentially) representing an editable wiki page.