MediaWiki  1.23.8
SearchUpdate.php
Go to the documentation of this file.
1 <?php
31 class SearchUpdate implements DeferrableUpdate {
33  private $id = 0;
34 
36  private $title;
37 
39  private $content;
40 
50  public function __construct( $id, $title, $c = false ) {
51  if ( is_string( $title ) ) {
52  $nt = Title::newFromText( $title );
53  } else {
54  $nt = $title;
55  }
56 
57  if ( $nt ) {
58  $this->id = $id;
59  // is_string() check is back-compat for ApprovedRevs
60  if ( is_string( $c ) ) {
61  $this->content = new TextContent( $c );
62  } else {
63  $this->content = $c ?: false;
64  }
65  $this->title = $nt;
66  } else {
67  wfDebug( "SearchUpdate object created with invalid title '$title'\n" );
68  }
69  }
70 
74  public function doUpdate() {
75  global $wgDisableSearchUpdate;
76 
77  if ( $wgDisableSearchUpdate || !$this->id ) {
78  return;
79  }
80 
81  wfProfileIn( __METHOD__ );
82 
83  $page = WikiPage::newFromId( $this->id, WikiPage::READ_LATEST );
84  $indexTitle = Title::indexTitle( $this->title->getNamespace(), $this->title->getText() );
85 
86  foreach ( SearchEngine::getSearchTypes() as $type ) {
87  $search = SearchEngine::create( $type );
88  if ( !$search->supports( 'search-update' ) ) {
89  continue;
90  }
91 
92  $normalTitle = $search->normalizeText( $indexTitle );
93 
94  if ( $page === null ) {
95  $search->delete( $this->id, $normalTitle );
96  continue;
97  } elseif ( $this->content === false ) {
98  $search->updateTitle( $this->id, $normalTitle );
99  continue;
100  }
101 
102  $text = $search->getTextFromContent( $this->title, $this->content );
103  if ( !$search->textAlreadyUpdatedForIndex() ) {
104  $text = self::updateText( $text );
105  }
106 
107  # Perform the actual update
108  $search->update( $this->id, $normalTitle, $search->normalizeText( $text ) );
109  }
110 
111  wfProfileOut( __METHOD__ );
112  }
113 
119  public static function updateText( $text ) {
121 
122  # Language-specific strip/conversion
123  $text = $wgContLang->normalizeForSearch( $text );
124  $lc = SearchEngine::legalSearchChars() . '&#;';
125 
126  wfProfileIn( __METHOD__ . '-regexps' );
127  $text = preg_replace( "/<\\/?\\s*[A-Za-z][^>]*?>/",
128  ' ', $wgContLang->lc( " " . $text . " " ) ); # Strip HTML markup
129  $text = preg_replace( "/(^|\\n)==\\s*([^\\n]+)\\s*==(\\s)/sD",
130  "\\1\\2 \\2 \\2\\3", $text ); # Emphasize headings
131 
132  # Strip external URLs
133  $uc = "A-Za-z0-9_\\/:.,~%\\-+&;#?!=()@\\x80-\\xFF";
134  $protos = "http|https|ftp|mailto|news|gopher";
135  $pat = "/(^|[^\\[])({$protos}):[{$uc}]+([^{$uc}]|$)/";
136  $text = preg_replace( $pat, "\\1 \\3", $text );
137 
138  $p1 = "/([^\\[])\\[({$protos}):[{$uc}]+]/";
139  $p2 = "/([^\\[])\\[({$protos}):[{$uc}]+\\s+([^\\]]+)]/";
140  $text = preg_replace( $p1, "\\1 ", $text );
141  $text = preg_replace( $p2, "\\1 \\3 ", $text );
142 
143  # Internal image links
144  $pat2 = "/\\[\\[image:([{$uc}]+)\\.(gif|png|jpg|jpeg)([^{$uc}])/i";
145  $text = preg_replace( $pat2, " \\1 \\3", $text );
146 
147  $text = preg_replace( "/([^{$lc}])([{$lc}]+)]]([a-z]+)/",
148  "\\1\\2 \\2\\3", $text ); # Handle [[game]]s
149 
150  # Strip all remaining non-search characters
151  $text = preg_replace( "/[^{$lc}]+/", " ", $text );
152 
153  # Handle 's, s'
154  #
155  # $text = preg_replace( "/([{$lc}]+)'s /", "\\1 \\1's ", $text );
156  # $text = preg_replace( "/([{$lc}]+)s' /", "\\1s ", $text );
157  #
158  # These tail-anchored regexps are insanely slow. The worst case comes
159  # when Japanese or Chinese text (ie, no word spacing) is written on
160  # a wiki configured for Western UTF-8 mode. The Unicode characters are
161  # expanded to hex codes and the "words" are very long paragraph-length
162  # monstrosities. On a large page the above regexps may take over 20
163  # seconds *each* on a 1GHz-level processor.
164  #
165  # Following are reversed versions which are consistently fast
166  # (about 3 milliseconds on 1GHz-level processor).
167  #
168  $text = strrev( preg_replace( "/ s'([{$lc}]+)/", " s'\\1 \\1", strrev( $text ) ) );
169  $text = strrev( preg_replace( "/ 's([{$lc}]+)/", " s\\1", strrev( $text ) ) );
170 
171  # Strip wiki '' and '''
172  $text = preg_replace( "/''[']*/", " ", $text );
173  wfProfileOut( __METHOD__ . '-regexps' );
174 
175  return $text;
176  }
177 }
Title\newFromText
static newFromText( $text, $defaultNamespace=NS_MAIN)
Create a new Title from text, such as what one would find in a link.
Definition: Title.php:189
php
skin txt MediaWiki includes four core it has been set as the default in MediaWiki since the replacing Monobook it had been been the default skin since before being replaced by Vector largely rewritten in while keeping its appearance Several legacy skins were removed in the as the burden of supporting them became too heavy to bear Those in etc for skin dependent CSS etc for skin dependent JavaScript These can also be customised on a per user by etc This feature has led to a wide variety of user styles becoming that gallery is a good place to ending in php
Definition: skin.txt:62
content
per default it will return the text for text based content
Definition: contenthandler.txt:107
wfProfileIn
wfProfileIn( $functionname)
Begin profiling of a function.
Definition: Profiler.php:33
DeferrableUpdate\doUpdate
doUpdate()
Perform the actual work.
$wgContLang
this class mediates it Skin Encapsulates a look and feel for the wiki All of the functions that render HTML and make choices about how to render it are here and are called from various other places when and is meant to be subclassed with other skins that may override some of its functions The User object contains a reference to a and so rather than having a global skin object we just rely on the global User and get the skin with $wgUser and also has some character encoding functions and other locale stuff The current user interface language is instantiated as and the content language as $wgContLang
Definition: design.txt:56
IDBAccessObject\READ_LATEST
const READ_LATEST
Definition: IDBAccessObject.php:49
title
to move a page</td >< td > &*You are moving the page across *A non empty talk page already exists under the new or *You uncheck the box below In those you will have to move or merge the page manually if desired</td >< td > be sure to &You are responsible for making sure that links continue to point where they are supposed to go Note that the page will &a page at the new title
Definition: All_system_messages.txt:2703
wfProfileOut
wfProfileOut( $functionname='missing')
Stop profiling of a function.
Definition: Profiler.php:46
global
when a variable name is used in a it is silently declared as a new masking the global
Definition: design.txt:93
wfDebug
wfDebug( $text, $dest='all')
Sends a line to the debug log if enabled or, optionally, to a comment in output.
Definition: GlobalFunctions.php:933
$title
presenting them properly to the user as errors is done by the caller $title
Definition: hooks.txt:1324
SearchEngine\legalSearchChars
static legalSearchChars()
Definition: SearchEngine.php:256
TextContent
Content object implementation for representing flat text.
Definition: TextContent.php:35
Content
Base interface for content objects.
Definition: Content.php:34
Title
Represents a title within MediaWiki.
Definition: Title.php:35
as
This document is intended to provide useful advice for parties seeking to redistribute MediaWiki to end users It s targeted particularly at maintainers for Linux since it s been observed that distribution packages of MediaWiki often break We ve consistently had to recommend that users seeking support use official tarballs instead of their distribution s and this often solves whatever problem the user is having It would be nice if this could such as
Definition: distributors.txt:9
DeferrableUpdate
Interface that deferrable updates should implement.
Definition: DeferredUpdates.php:29
Title\indexTitle
static indexTitle( $ns, $title)
Get a string representation of a title suitable for including in a search index.
Definition: Title.php:673
SearchEngine\create
static create( $type=null)
Load up the appropriate search engine class for the currently active database backend,...
Definition: SearchEngine.php:447
SearchEngine\getSearchTypes
static getSearchTypes()
Return the search engines we support.
Definition: SearchEngine.php:472
$type
$type
Definition: testCompression.php:46