Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
75.68% |
56 / 74 |
|
55.56% |
5 / 9 |
CRAP | |
0.00% |
0 / 1 |
ExperimentalHighlightedFieldBuilder | |
75.68% |
56 / 74 |
|
55.56% |
5 / 9 |
18.24 | |
0.00% |
0 / 1 |
__construct | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
entireValue | |
71.43% |
5 / 7 |
|
0.00% |
0 / 1 |
1.02 | |||
redirectAndHeadings | |
77.78% |
7 / 9 |
|
0.00% |
0 / 1 |
1.01 | |||
text | |
100.00% |
19 / 19 |
|
100.00% |
1 / 1 |
1 | |||
mainText | |
60.00% |
3 / 5 |
|
0.00% |
0 / 1 |
1.06 | |||
newRegexField | |
100.00% |
10 / 10 |
|
100.00% |
1 / 1 |
2 | |||
merge | |
100.00% |
9 / 9 |
|
100.00% |
1 / 1 |
6 | |||
skipIfLastMatched | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
getFactories | |
0.00% |
0 / 12 |
|
0.00% |
0 / 1 |
2 |
1 | <?php |
2 | |
3 | namespace CirrusSearch\Search\Fetch; |
4 | |
5 | use CirrusSearch\Search\SearchQuery; |
6 | use CirrusSearch\SearchConfig; |
7 | |
8 | class ExperimentalHighlightedFieldBuilder extends BaseHighlightedField { |
9 | public const EXPERIMENTAL_HL_TYPE = 'experimental'; |
10 | |
11 | /** |
12 | * @param string $fieldName |
13 | * @param string $target |
14 | * @param int $priority |
15 | */ |
16 | public function __construct( $fieldName, $target, $priority = self::DEFAULT_TARGET_PRIORITY ) { |
17 | parent::__construct( $fieldName, self::EXPERIMENTAL_HL_TYPE, $target, $priority ); |
18 | } |
19 | |
20 | /** |
21 | * @return callable |
22 | */ |
23 | public static function entireValue(): callable { |
24 | return static function ( SearchConfig $config, $fieldName, $target, $priority ) { |
25 | $self = new self( $fieldName, $target, $priority ); |
26 | $self->matchPlainFields(); |
27 | $self->setFragmenter( 'none' ); |
28 | $self->setNumberOfFragments( 1 ); |
29 | return $self; |
30 | }; |
31 | } |
32 | |
33 | /** |
34 | * @return callable |
35 | */ |
36 | public static function redirectAndHeadings(): callable { |
37 | return static function ( SearchConfig $config, $fieldName, $target, $priority ) { |
38 | $self = new self( $fieldName, $target, $priority ); |
39 | $self->matchPlainFields(); |
40 | $self->addOption( 'skip_if_last_matched', true ); |
41 | $self->setFragmenter( 'none' ); |
42 | $self->setOrder( 'score' ); |
43 | $self->setNumberOfFragments( 1 ); |
44 | return $self; |
45 | }; |
46 | } |
47 | |
48 | /** |
49 | * @return callable |
50 | */ |
51 | public static function text(): callable { |
52 | return static function ( SearchConfig $config, $fieldName, $target, $priority ) { |
53 | $self = new self( $fieldName, $target, $priority ); |
54 | $self->matchPlainFields(); |
55 | $self->addOption( 'skip_if_last_matched', true ); |
56 | $self->setFragmenter( 'scan' ); |
57 | $self->setNumberOfFragments( 1 ); |
58 | $self->setFragmentSize( $config->get( 'CirrusSearchFragmentSize' ) ); |
59 | $self->setOptions( [ |
60 | 'top_scoring' => true, |
61 | 'boost_before' => [ |
62 | // Note these values are super arbitrary right now. |
63 | '20' => 2, |
64 | '50' => 1.8, |
65 | '200' => 1.5, |
66 | '1000' => 1.2, |
67 | ], |
68 | // We should set a limit on the number of fragments we try because if we |
69 | // don't then we'll hit really crazy documents, say 10MB of "d d". This'll |
70 | // keep us from scanning more then the first couple thousand of them. |
71 | // Setting this too low (like 50) can bury good snippets if the search |
72 | // contains common words. |
73 | 'max_fragments_scored' => 5000, |
74 | ] ); |
75 | return $self; |
76 | }; |
77 | } |
78 | |
79 | /** |
80 | * @return callable |
81 | */ |
82 | protected static function mainText(): callable { |
83 | return function ( SearchConfig $config, $fieldName, $target, $priority ) { |
84 | $self = ( self::text() )( $config, $fieldName, $target, $priority ); |
85 | /** @var BaseHighlightedField $self */ |
86 | $self->setNoMatchSize( $config->get( 'CirrusSearchFragmentSize' ) ); |
87 | return $self; |
88 | }; |
89 | } |
90 | |
91 | /** |
92 | * @param SearchConfig $config |
93 | * @param string $name |
94 | * @param string $target |
95 | * @param string $pattern |
96 | * @param bool $caseInsensitive |
97 | * @param int $priority |
98 | * @return self |
99 | */ |
100 | public static function newRegexField( |
101 | SearchConfig $config, |
102 | $name, |
103 | $target, |
104 | $pattern, |
105 | $caseInsensitive, |
106 | $priority |
107 | ): self { |
108 | // TODO: verify that we actually need to have all the text() options when running a regex |
109 | /** @var self $self */ |
110 | $self = ( self::text() )( $config, $name, $target, $priority ); |
111 | $self->addOption( 'regex', [ $pattern ] ); |
112 | $self->addOption( 'locale', $config->get( 'LanguageCode' ) ); |
113 | $self->addOption( 'regex_flavor', 'lucene' ); |
114 | $self->addOption( 'skip_query', true ); |
115 | $self->addOption( 'regex_case_insensitive', $caseInsensitive ); |
116 | $self->addOption( 'max_determinized_states', $config->get( 'CirrusSearchRegexMaxDeterminizedStates' ) ); |
117 | |
118 | if ( $name == 'source_text.plain' ) { |
119 | $self->setNoMatchSize( $config->get( 'CirrusSearchFragmentSize' ) ); |
120 | } |
121 | return $self; |
122 | } |
123 | |
124 | /** |
125 | * @inheritDoc |
126 | */ |
127 | public function merge( HighlightedField $other ): HighlightedField { |
128 | if ( isset( $this->options['regex'] ) && |
129 | $other instanceof ExperimentalHighlightedFieldBuilder && |
130 | isset( $other->options['regex'] ) && |
131 | $this->getFieldName() === $other->getFieldName() |
132 | ) { |
133 | $this->options['regex'] = array_merge( $this->options['regex'], $other->options['regex'] ); |
134 | $mergedInsensitivity = $this->options['regex_case_insensitive'] || $other->options['regex_case_insensitive']; |
135 | $this->options['regex_case_insensitive'] = $mergedInsensitivity; |
136 | return $this; |
137 | } else { |
138 | return parent::merge( $other ); |
139 | } |
140 | } |
141 | |
142 | /** |
143 | * @return ExperimentalHighlightedFieldBuilder |
144 | */ |
145 | public function skipIfLastMatched(): BaseHighlightedField { |
146 | $this->addOption( 'skip_if_last_matched', true ); |
147 | return $this; |
148 | } |
149 | |
150 | /** |
151 | * @return array |
152 | */ |
153 | public static function getFactories() { |
154 | return [ |
155 | SearchQuery::SEARCH_TEXT => [ |
156 | 'title' => self::entireValue(), |
157 | 'redirect.title' => self::redirectAndHeadings(), |
158 | 'category' => self::redirectAndHeadings(), |
159 | 'heading' => self::redirectAndHeadings(), |
160 | 'text' => self::mainText(), |
161 | 'source_text.plain' => self::mainText(), |
162 | 'auxiliary_text' => self::text(), |
163 | 'file_text' => self::text(), |
164 | ] |
165 | ]; |
166 | } |
167 | } |