54 if ( is_string( $title ) ) {
55 $nt = Title::newFromText( $title );
63 if ( is_string( $c ) ) {
66 $this->content = $c ?:
false;
70 wfDebug(
"SearchUpdate object created with invalid title '$title'\n" );
78 $services = MediaWikiServices::getInstance();
79 $config =
$services->getSearchEngineConfig();
81 if ( $config->getConfig()->get(
'DisableSearchUpdate' ) || !$this->id ) {
85 $seFactory =
$services->getSearchEngineFactory();
86 foreach ( $config->getSearchTypes() as
$type ) {
87 $search = $seFactory->create(
$type );
88 if ( !$search->supports(
'search-update' ) ) {
95 $search->delete( $this->
id, $normalTitle );
97 } elseif ( $this->content ===
false ) {
98 $search->updateTitle( $this->
id, $normalTitle );
102 $text = $search->getTextFromContent( $this->title, $this->content );
103 if ( !$search->textAlreadyUpdatedForIndex() ) {
107 # Perform the actual update
108 $search->update( $this->
id, $normalTitle, $search->normalizeText( $text ) );
121 $services = MediaWikiServices::getInstance();
122 $contLang =
$services->getContentLanguage();
123 # Language-specific strip/conversion
124 $text = $contLang->normalizeForSearch( $text );
125 $se = $se ?:
$services->newSearchEngine();
126 $lc = $se->legalSearchChars() .
'&#;';
129 $text = preg_replace(
"/<\\/?\\s*[A-Za-z][^>]*?>/",
130 ' ', $contLang->lc(
" " . $text .
" " ) );
131 $text = preg_replace(
"/(^|\\n)==\\s*([^\\n]+)\\s*==(\\s)/sD",
132 "\\1\\2 \\2 \\2\\3", $text ); # Emphasize headings
134 # Strip external URLs
135 $uc =
"A-Za-z0-9_\\/:.,~%\\-+&;#?!=()@\\x80-\\xFF";
136 $protos =
"http|https|ftp|mailto|news|gopher";
137 $pat =
"/(^|[^\\[])({$protos}):[{$uc}]+([^{$uc}]|$)/";
138 $text = preg_replace( $pat,
"\\1 \\3", $text );
140 $p1 =
"/([^\\[])\\[({$protos}):[{$uc}]+]/";
141 $p2 =
"/([^\\[])\\[({$protos}):[{$uc}]+\\s+([^\\]]+)]/";
142 $text = preg_replace( $p1,
"\\1 ", $text );
143 $text = preg_replace( $p2,
"\\1 \\3 ", $text );
145 # Internal image links
146 $pat2 =
"/\\[\\[image:([{$uc}]+)\\.(gif|png|jpg|jpeg)([^{$uc}])/i";
147 $text = preg_replace( $pat2,
" \\1 \\3", $text );
149 $text = preg_replace(
"/([^{$lc}])([{$lc}]+)]]([a-z]+)/",
150 "\\1\\2 \\2\\3", $text ); # Handle [[game]]s
152 # Strip all remaining non-search characters
153 $text = preg_replace(
"/[^{$lc}]+/",
" ", $text );
171 $text = strrev( preg_replace(
"/ s'([{$lc}]+)/",
" s'\\1 \\1", strrev( $text ) ) );
172 $text = strrev( preg_replace(
"/ 's([{$lc}]+)/",
" s\\1", strrev( $text ) ) );
174 # Strip wiki '' and '''
175 $text = preg_replace(
"/''[']*/",
" ", $text );
190 if ( !isset( $this->page ) ) {
191 $this->page = WikiPage::newFromID( $this->
id, WikiPage::READ_LATEST );
205 $contLang = MediaWikiServices::getInstance()->getContentLanguage();
206 $ns = $this->title->getNamespace();
207 $title = $this->title->getText();
210 $t = $contLang->normalizeForSearch( $title );
211 $t = preg_replace(
"/[^{$lc}]+/",
' ',
$t );
212 $t = $contLang->lc(
$t );
215 $t = preg_replace(
"/([{$lc}]+)'s( |$)/",
"\\1 \\1's ",
$t );
216 $t = preg_replace(
"/([{$lc}]+)s'( |$)/",
"\\1s ",
$t );
218 $t = preg_replace(
"/\\s+/",
' ',
$t );
221 $t = preg_replace(
"/ (png|gif|jpg|jpeg|ogg)$/",
"",
$t );
wfDebug( $text, $dest='all', array $context=[])
Sends a line to the debug log if enabled or, optionally, to a comment in output.
Contain a class for special pages.
normalizeText( $string)
When overridden in derived class, performs database-specific conversions on text to be used for searc...
static legalSearchChars( $type=self::CHARS_ALL)
Get chars legal for search NOTE: usage as static is deprecated and preserved only as BC measure.
Database independant search index updater.
getLatestPage()
Get WikiPage for the SearchUpdate $id using WikiPage::READ_LATEST and ensure using the same WikiPage ...
updateText( $text, SearchEngine $se=null)
Clean text for indexing.
__construct( $id, $title, $c=false)
getNormalizedTitle(SearchEngine $search)
Get a normalized string representation of a title suitable for including in a search index.
Content bool $content
Content of the page (not text)
doUpdate()
Perform actual update for the entry.
Title $title
Title we're updating.
int $id
Page id being updated.
Content object implementation for representing flat text.
Represents a title within MediaWiki.
Class representing a MediaWiki article and history.
namespace and then decline to actually register it file or subcat img or subcat $title
static configuration should be added through ResourceLoaderGetConfigVars instead can be used to get the real title e g db for database replication lag or jobqueue for job queue size converted to pseudo seconds It is possible to add more fields and they will be returned to the user in the API response after the basic globals have been set but before ordinary actions take place or wrap services the preferred way to define a new service is the $wgServiceWiringFiles array $services
Base interface for content objects.
Interface that deferrable updates should implement.