Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
75.68% covered (warning)
75.68%
56 / 74
55.56% covered (warning)
55.56%
5 / 9
CRAP
0.00% covered (danger)
0.00%
0 / 1
ExperimentalHighlightedFieldBuilder
75.68% covered (warning)
75.68%
56 / 74
55.56% covered (warning)
55.56%
5 / 9
18.24
0.00% covered (danger)
0.00%
0 / 1
 __construct
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 entireValue
71.43% covered (warning)
71.43%
5 / 7
0.00% covered (danger)
0.00%
0 / 1
1.02
 redirectAndHeadings
77.78% covered (warning)
77.78%
7 / 9
0.00% covered (danger)
0.00%
0 / 1
1.01
 text
100.00% covered (success)
100.00%
19 / 19
100.00% covered (success)
100.00%
1 / 1
1
 mainText
60.00% covered (warning)
60.00%
3 / 5
0.00% covered (danger)
0.00%
0 / 1
1.06
 newRegexField
100.00% covered (success)
100.00%
10 / 10
100.00% covered (success)
100.00%
1 / 1
2
 merge
100.00% covered (success)
100.00%
9 / 9
100.00% covered (success)
100.00%
1 / 1
6
 skipIfLastMatched
100.00% covered (success)
100.00%
2 / 2
100.00% covered (success)
100.00%
1 / 1
1
 getFactories
0.00% covered (danger)
0.00%
0 / 12
0.00% covered (danger)
0.00%
0 / 1
2
1<?php
2
3namespace CirrusSearch\Search\Fetch;
4
5use CirrusSearch\Search\SearchQuery;
6use CirrusSearch\SearchConfig;
7use MediaWiki\MainConfigNames;
8
9class ExperimentalHighlightedFieldBuilder extends BaseHighlightedField {
10    public const EXPERIMENTAL_HL_TYPE = 'experimental';
11
12    /**
13     * @param string $fieldName
14     * @param string $target
15     * @param int $priority
16     */
17    public function __construct( $fieldName, $target, $priority = self::DEFAULT_TARGET_PRIORITY ) {
18        parent::__construct( $fieldName, self::EXPERIMENTAL_HL_TYPE, $target, $priority );
19    }
20
21    public static function entireValue(): callable {
22        return static function ( SearchConfig $config, $fieldName, $target, $priority ) {
23            $self = new self( $fieldName, $target, $priority );
24            $self->matchPlainFields();
25            $self->setFragmenter( 'none' );
26            $self->setNumberOfFragments( 1 );
27            return $self;
28        };
29    }
30
31    public static function redirectAndHeadings(): callable {
32        return static function ( SearchConfig $config, $fieldName, $target, $priority ) {
33            $self = new self( $fieldName, $target, $priority );
34            $self->matchPlainFields();
35            $self->addOption( 'skip_if_last_matched', true );
36            $self->setFragmenter( 'none' );
37            $self->setOrder( 'score' );
38            $self->setNumberOfFragments( 1 );
39            return $self;
40        };
41    }
42
43    public static function text(): callable {
44        return static function ( SearchConfig $config, $fieldName, $target, $priority ) {
45            $self = new self( $fieldName, $target, $priority );
46            $self->matchPlainFields();
47            $self->addOption( 'skip_if_last_matched', true );
48            $self->setFragmenter( 'scan' );
49            $self->setNumberOfFragments( 1 );
50            $self->setFragmentSize( $config->get( 'CirrusSearchFragmentSize' ) );
51            $self->setOptions( [
52                'top_scoring' => true,
53                'boost_before' => [
54                    // Note these values are super arbitrary right now.
55                    '20' => 2,
56                    '50' => 1.8,
57                    '200' => 1.5,
58                    '1000' => 1.2,
59                ],
60                // We should set a limit on the number of fragments we try because if we
61                // don't then we'll hit really crazy documents, say 10MB of "d d".  This'll
62                // keep us from scanning more then the first couple thousand of them.
63                // Setting this too low (like 50) can bury good snippets if the search
64                // contains common words.
65                'max_fragments_scored' => 5000,
66            ] );
67            return $self;
68        };
69    }
70
71    protected static function mainText(): callable {
72        return function ( SearchConfig $config, $fieldName, $target, $priority ) {
73            $self = ( self::text() )( $config, $fieldName, $target, $priority );
74            /** @var BaseHighlightedField $self */
75            $self->setNoMatchSize( $config->get( 'CirrusSearchFragmentSize' ) );
76            return $self;
77        };
78    }
79
80    /**
81     * @param SearchConfig $config
82     * @param string $name
83     * @param string $target
84     * @param string $pattern
85     * @param bool $caseInsensitive
86     * @param int $priority
87     * @param string $regexFlavor
88     * @return self
89     */
90    public static function newRegexField(
91        SearchConfig $config,
92        $name,
93        $target,
94        $pattern,
95        $caseInsensitive,
96        $priority,
97        $regexFlavor = 'lucene'
98    ): self {
99        // TODO: verify that we actually need to have all the text() options when running a regex
100        /** @var self $self */
101        $self = ( self::text() )( $config, $name, $target, $priority );
102        $self->addOption( 'regex', [ $pattern ] );
103        $self->addOption( 'locale', $config->get( MainConfigNames::LanguageCode ) );
104        $self->addOption( 'regex_flavor', $regexFlavor );
105        $self->addOption( 'skip_query', true );
106        $self->addOption( 'regex_case_insensitive', $caseInsensitive );
107        $self->addOption( 'max_determinized_states', $config->get( 'CirrusSearchRegexMaxDeterminizedStates' ) );
108
109        if ( $name == 'source_text.plain' ) {
110            $self->setNoMatchSize( $config->get( 'CirrusSearchFragmentSize' ) );
111        }
112        return $self;
113    }
114
115    /**
116     * @inheritDoc
117     */
118    public function merge( HighlightedField $other ): HighlightedField {
119        if ( isset( $this->options['regex'] ) &&
120             $other instanceof self &&
121             isset( $other->options['regex'] ) &&
122             $this->getFieldName() === $other->getFieldName()
123        ) {
124            $this->options['regex'] = array_merge( $this->options['regex'], $other->options['regex'] );
125            $mergedInsensitivity = $this->options['regex_case_insensitive'] || $other->options['regex_case_insensitive'];
126            $this->options['regex_case_insensitive'] = $mergedInsensitivity;
127            return $this;
128        } else {
129            return parent::merge( $other );
130        }
131    }
132
133    /**
134     * @return $this
135     */
136    public function skipIfLastMatched(): BaseHighlightedField {
137        $this->addOption( 'skip_if_last_matched', true );
138        return $this;
139    }
140
141    /**
142     * @return array
143     */
144    public static function getFactories() {
145        return [
146            SearchQuery::SEARCH_TEXT => [
147                'title' => self::entireValue(),
148                'redirect.title' => self::redirectAndHeadings(),
149                'category' => self::redirectAndHeadings(),
150                'heading' => self::redirectAndHeadings(),
151                'text' => self::mainText(),
152                'source_text.plain' => self::mainText(),
153                'auxiliary_text' => self::text(),
154                'file_text' => self::text(),
155            ]
156        ];
157    }
158}