54 if ( is_string( $title ) ) {
55 $nt = Title::newFromText( $title );
63 if ( is_string( $c ) ) {
66 $this->content = $c ?:
false;
70 wfDebug(
"SearchUpdate object created with invalid title '$title'\n" );
78 $config = MediaWikiServices::getInstance()->getSearchEngineConfig();
80 if ( $config->getConfig()->get(
'DisableSearchUpdate' ) || !$this->id ) {
84 $seFactory = MediaWikiServices::getInstance()->getSearchEngineFactory();
85 foreach ( $config->getSearchTypes() as
$type ) {
86 $search = $seFactory->create(
$type );
87 if ( !$search->supports(
'search-update' ) ) {
94 $search->delete( $this->
id, $normalTitle );
96 } elseif ( $this->content ===
false ) {
97 $search->updateTitle( $this->
id, $normalTitle );
101 $text = $search->getTextFromContent( $this->title, $this->content );
102 if ( !$search->textAlreadyUpdatedForIndex() ) {
106 # Perform the actual update
107 $search->update( $this->
id, $normalTitle, $search->normalizeText( $text ) );
122 # Language-specific strip/conversion
124 $se = $se ?: MediaWikiServices::getInstance()->newSearchEngine();
125 $lc = $se->legalSearchChars() .
'&#;';
127 $text = preg_replace(
"/<\\/?\\s*[A-Za-z][^>]*?>/",
128 ' ',
$wgContLang->lc(
" " . $text .
" " ) ); # Strip HTML markup
129 $text = preg_replace(
"/(^|\\n)==\\s*([^\\n]+)\\s*==(\\s)/sD",
130 "\\1\\2 \\2 \\2\\3", $text ); # Emphasize headings
132 # Strip external URLs
133 $uc =
"A-Za-z0-9_\\/:.,~%\\-+&;#?!=()@\\x80-\\xFF";
134 $protos =
"http|https|ftp|mailto|news|gopher";
135 $pat =
"/(^|[^\\[])({$protos}):[{$uc}]+([^{$uc}]|$)/";
136 $text = preg_replace( $pat,
"\\1 \\3", $text );
138 $p1 =
"/([^\\[])\\[({$protos}):[{$uc}]+]/";
139 $p2 =
"/([^\\[])\\[({$protos}):[{$uc}]+\\s+([^\\]]+)]/";
140 $text = preg_replace( $p1,
"\\1 ", $text );
141 $text = preg_replace( $p2,
"\\1 \\3 ", $text );
143 # Internal image links
144 $pat2 =
"/\\[\\[image:([{$uc}]+)\\.(gif|png|jpg|jpeg)([^{$uc}])/i";
145 $text = preg_replace( $pat2,
" \\1 \\3", $text );
147 $text = preg_replace(
"/([^{$lc}])([{$lc}]+)]]([a-z]+)/",
148 "\\1\\2 \\2\\3", $text ); # Handle [[game]]s
150 # Strip all remaining non-search characters
151 $text = preg_replace(
"/[^{$lc}]+/",
" ", $text );
169 $text = strrev( preg_replace(
"/ s'([{$lc}]+)/",
" s'\\1 \\1", strrev( $text ) ) );
170 $text = strrev( preg_replace(
"/ 's([{$lc}]+)/",
" s\\1", strrev( $text ) ) );
172 # Strip wiki '' and '''
173 $text = preg_replace(
"/''[']*/",
" ", $text );
188 if ( !isset( $this->page ) ) {
189 $this->page = WikiPage::newFromID( $this->
id, WikiPage::READ_LATEST );
205 $ns = $this->title->getNamespace();
206 $title = $this->title->getText();
210 $t = preg_replace(
"/[^{$lc}]+/",
' ',
$t );
214 $t = preg_replace(
"/([{$lc}]+)'s( |$)/",
"\\1 \\1's ",
$t );
215 $t = preg_replace(
"/([{$lc}]+)s'( |$)/",
"\\1s ",
$t );
217 $t = preg_replace(
"/\\s+/",
' ',
$t );
220 $t = preg_replace(
"/ (png|gif|jpg|jpeg|ogg)$/",
"",
$t );
wfDebug( $text, $dest='all', array $context=[])
Sends a line to the debug log if enabled or, optionally, to a comment in output.
Contain a class for special pages.
normalizeText( $string)
When overridden in derived class, performs database-specific conversions on text to be used for searc...
static legalSearchChars( $type=self::CHARS_ALL)
Get chars legal for search NOTE: usage as static is deprecated and preserved only as BC measure.
Database independant search index updater.
getLatestPage()
Get WikiPage for the SearchUpdate $id using WikiPage::READ_LATEST and ensure using the same WikiPage ...
updateText( $text, SearchEngine $se=null)
Clean text for indexing.
__construct( $id, $title, $c=false)
getNormalizedTitle(SearchEngine $search)
Get a normalized string representation of a title suitable for including in a search index.
Content bool $content
Content of the page (not text)
doUpdate()
Perform actual update for the entry.
Title $title
Title we're updating.
int $id
Page id being updated.
Content object implementation for representing flat text.
Represents a title within MediaWiki.
Class representing a MediaWiki article and history.
this class mediates it Skin Encapsulates a look and feel for the wiki All of the functions that render HTML and make choices about how to render it are here and are called from various other places when and is meant to be subclassed with other skins that may override some of its functions The User object contains a reference to a and so rather than having a global skin object we just rely on the global User and get the skin with $wgUser and also has some character encoding functions and other locale stuff The current user interface language is instantiated as and the local content language as $wgContLang
Base interface for content objects.
Interface that deferrable updates should implement.