59 if ( is_string( $c ) ) {
60 wfDeprecated( __METHOD__ .
" with a string for the content",
'1.34' );
62 } elseif ( is_bool( $c ) ) {
63 wfDeprecated( __METHOD__ .
" with a boolean for the content",
'1.34' );
73 $services = MediaWikiServices::getInstance();
74 $config = $services->getSearchEngineConfig();
76 if ( $config->getConfig()->get(
'DisableSearchUpdate' ) || !$this->id ) {
80 $seFactory = $services->getSearchEngineFactory();
81 foreach ( $config->getSearchTypes() as
$type ) {
82 $search = $seFactory->create(
$type );
83 if ( !$search->supports(
'search-update' ) ) {
90 $search->delete( $this->
id, $normalTitle );
92 } elseif ( $this->content ===
null ) {
93 $search->updateTitle( $this->
id, $normalTitle );
97 $text = $this->content !==
null ? $this->content->getTextForSearchIndex() :
'';
100 # Perform the actual update
101 $search->update( $this->
id, $normalTitle, $search->normalizeText( $text ) );
114 $services = MediaWikiServices::getInstance();
115 $contLang = $services->getContentLanguage();
116 # Language-specific strip/conversion
117 $text = $contLang->normalizeForSearch( $text );
118 $se = $se ?: $services->newSearchEngine();
119 $lc = $se->legalSearchChars() .
'&#;';
122 $text = preg_replace(
"/<\\/?\\s*[A-Za-z][^>]*?>/",
123 ' ', $contLang->lc(
" " . $text .
" " ) );
124 $text = preg_replace(
"/(^|\\n)==\\s*([^\\n]+)\\s*==(\\s)/sD",
125 "\\1\\2 \\2 \\2\\3", $text ); # Emphasize headings
127 # Strip external URLs
128 $uc =
"A-Za-z0-9_\\/:.,~%\\-+&;#?!=()@\\x80-\\xFF";
129 $protos =
"http|https|ftp|mailto|news|gopher";
130 $pat =
"/(^|[^\\[])({$protos}):[{$uc}]+([^{$uc}]|$)/";
131 $text = preg_replace( $pat,
"\\1 \\3", $text );
133 $p1 =
"/([^\\[])\\[({$protos}):[{$uc}]+]/";
134 $p2 =
"/([^\\[])\\[({$protos}):[{$uc}]+\\s+([^\\]]+)]/";
135 $text = preg_replace( $p1,
"\\1 ", $text );
136 $text = preg_replace( $p2,
"\\1 \\3 ", $text );
138 # Internal image links
139 $pat2 =
"/\\[\\[image:([{$uc}]+)\\.(gif|png|jpg|jpeg)([^{$uc}])/i";
140 $text = preg_replace( $pat2,
" \\1 \\3", $text );
142 $text = preg_replace(
"/([^{$lc}])([{$lc}]+)]]([a-z]+)/",
143 "\\1\\2 \\2\\3", $text ); # Handle [[game]]s
145 # Strip all remaining non-search characters
146 $text = preg_replace(
"/[^{$lc}]+/",
" ", $text );
164 $text = strrev( preg_replace(
"/ s'([{$lc}]+)/",
" s'\\1 \\1", strrev( $text ) ) );
165 $text = strrev( preg_replace(
"/ 's([{$lc}]+)/",
" s\\1", strrev( $text ) ) );
167 # Strip wiki '' and '''
168 $text = preg_replace(
"/''[']*/",
" ", $text );
183 if ( !isset( $this->latestPage ) ) {
184 $this->latestPage = MediaWikiServices::getInstance()->getPageStore()
185 ->getPageById( $this->
id, PageStore::READ_LATEST );
188 return $this->latestPage;
199 $contLang = MediaWikiServices::getInstance()->getContentLanguage();
200 $ns = $this->page->getNamespace();
201 $title = str_replace(
'_',
' ', $this->page->getDBkey() );
204 $t = $contLang->normalizeForSearch(
$title );
205 $t = preg_replace(
"/[^{$lc}]+/",
' ',
$t );
206 $t = $contLang->lc(
$t );
209 $t = preg_replace(
"/([{$lc}]+)'s( |$)/",
"\\1 \\1's ",
$t );
210 $t = preg_replace(
"/([{$lc}]+)s'( |$)/",
"\\1s ",
$t );
212 $t = preg_replace(
"/\\s+/",
' ',
$t );
215 $t = preg_replace(
"/ (png|gif|jpg|jpeg|ogg)$/",
"",
$t );
wfDeprecated( $function, $version=false, $component=false, $callerOffset=2)
Logs a warning that a deprecated feature was used.
Contain a class for special pages.
normalizeText( $string)
When overridden in derived class, performs database-specific conversions on text to be used for searc...
legalSearchChars( $type=self::CHARS_ALL)
Get chars legal for search.
Database independent search index updater.
ExistingPageRecord null $latestPage
PageIdentity $page
The page we're updating.
getLatestPage()
Get ExistingPageRecord for the SearchUpdate $id using PageStore::READ_LATEST and ensure using the sam...
updateText( $text, SearchEngine $se=null)
Clean text for indexing.
getNormalizedTitle(SearchEngine $search)
Get a normalized string representation of a title suitable for including in a search index.
doUpdate()
Perform actual update for the entry.
Content null $content
Content of the page (not text)
__construct( $id, $page, $c=null)
int $id
Page id being updated.
Content object implementation for representing flat text.
Base interface for content objects.
Interface that deferrable updates should implement.
Interface for objects (potentially) representing an editable wiki page.