Code Coverage |
||||||||||
Classes and Traits |
Functions and Methods |
Lines |
||||||||
Total | |
0.00% |
0 / 1 |
|
55.56% |
5 / 9 |
CRAP | |
81.36% |
48 / 59 |
ExperimentalHighlightedFieldBuilder | |
0.00% |
0 / 1 |
|
55.56% |
5 / 9 |
16.46 | |
81.36% |
48 / 59 |
__construct | |
100.00% |
1 / 1 |
1 | |
100.00% |
2 / 2 |
|||
entireValue | |
0.00% |
0 / 1 |
1.00 | |
83.33% |
5 / 6 |
|||
redirectAndHeadings | |
0.00% |
0 / 1 |
1.00 | |
87.50% |
7 / 8 |
|||
text | |
100.00% |
1 / 1 |
1 | |
100.00% |
10 / 10 |
|||
mainText | |
0.00% |
0 / 1 |
1.02 | |
75.00% |
3 / 4 |
|||
newRegexField | |
100.00% |
1 / 1 |
2 | |
100.00% |
10 / 10 |
|||
merge | |
100.00% |
1 / 1 |
6 | |
100.00% |
9 / 9 |
|||
skipIfLastMatched | |
100.00% |
1 / 1 |
1 | |
100.00% |
2 / 2 |
|||
getFactories | |
0.00% |
0 / 1 |
2 | |
0.00% |
0 / 8 |
<?php | |
namespace CirrusSearch\Search\Fetch; | |
use CirrusSearch\Search\SearchQuery; | |
use CirrusSearch\SearchConfig; | |
class ExperimentalHighlightedFieldBuilder extends BaseHighlightedField { | |
public const EXPERIMENTAL_HL_TYPE = 'experimental'; | |
/** | |
* @param string $fieldName | |
* @param string $target | |
* @param int $priority | |
*/ | |
public function __construct( $fieldName, $target, $priority = self::DEFAULT_TARGET_PRIORITY ) { | |
parent::__construct( $fieldName, self::EXPERIMENTAL_HL_TYPE, $target, $priority ); | |
} | |
/** | |
* @return callable | |
*/ | |
public static function entireValue(): callable { | |
return static function ( SearchConfig $config, $fieldName, $target, $priority ) { | |
$self = new self( $fieldName, $target, $priority ); | |
$self->matchPlainFields(); | |
$self->setFragmenter( 'none' ); | |
$self->setNumberOfFragments( 1 ); | |
return $self; | |
}; | |
} | |
/** | |
* @return callable | |
*/ | |
public static function redirectAndHeadings(): callable { | |
return static function ( SearchConfig $config, $fieldName, $target, $priority ) { | |
$self = new self( $fieldName, $target, $priority ); | |
$self->matchPlainFields(); | |
$self->addOption( 'skip_if_last_matched', true ); | |
$self->setFragmenter( 'none' ); | |
$self->setOrder( 'score' ); | |
$self->setNumberOfFragments( 1 ); | |
return $self; | |
}; | |
} | |
/** | |
* @return callable | |
*/ | |
public static function text(): callable { | |
return static function ( SearchConfig $config, $fieldName, $target, $priority ) { | |
$self = new self( $fieldName, $target, $priority ); | |
$self->matchPlainFields(); | |
$self->addOption( 'skip_if_last_matched', true ); | |
$self->setFragmenter( 'scan' ); | |
$self->setNumberOfFragments( 1 ); | |
$self->setFragmentSize( $config->get( 'CirrusSearchFragmentSize' ) ); | |
$self->setOptions( [ | |
'top_scoring' => true, | |
'boost_before' => [ | |
// Note these values are super arbitrary right now. | |
'20' => 2, | |
'50' => 1.8, | |
'200' => 1.5, | |
'1000' => 1.2, | |
], | |
// We should set a limit on the number of fragments we try because if we | |
// don't then we'll hit really crazy documents, say 10MB of "d d". This'll | |
// keep us from scanning more then the first couple thousand of them. | |
// Setting this too low (like 50) can bury good snippets if the search | |
// contains common words. | |
'max_fragments_scored' => 5000, | |
] ); | |
return $self; | |
}; | |
} | |
/** | |
* @return callable | |
*/ | |
protected static function mainText(): callable { | |
return function ( SearchConfig $config, $fieldName, $target, $priority ) { | |
$self = ( self::text() )( $config, $fieldName, $target, $priority ); | |
/** @var BaseHighlightedField $self */ | |
$self->setNoMatchSize( $config->get( 'CirrusSearchFragmentSize' ) ); | |
return $self; | |
}; | |
} | |
/** | |
* @param SearchConfig $config | |
* @param string $name | |
* @param string $target | |
* @param string $pattern | |
* @param bool $caseInsensitive | |
* @param int $priority | |
* @return self | |
*/ | |
public static function newRegexField( | |
SearchConfig $config, | |
$name, | |
$target, | |
$pattern, | |
$caseInsensitive, | |
$priority | |
): self { | |
// TODO: verify that we actually need to have all the text() options when running a regex | |
/** @var self $self */ | |
$self = ( self::text() )( $config, $name, $target, $priority ); | |
$self->addOption( 'regex', [ $pattern ] ); | |
$self->addOption( 'locale', $config->get( 'LanguageCode' ) ); | |
$self->addOption( 'regex_flavor', 'lucene' ); | |
$self->addOption( 'skip_query', true ); | |
$self->addOption( 'regex_case_insensitive', $caseInsensitive ); | |
$self->addOption( 'max_determinized_states', $config->get( 'CirrusSearchRegexMaxDeterminizedStates' ) ); | |
if ( $name == 'source_text.plain' ) { | |
$self->setNoMatchSize( $config->get( 'CirrusSearchFragmentSize' ) ); | |
} | |
return $self; | |
} | |
/** | |
* @inheritDoc | |
*/ | |
public function merge( HighlightedField $other ): HighlightedField { | |
if ( isset( $this->options['regex'] ) && | |
$other instanceof ExperimentalHighlightedFieldBuilder && | |
isset( $other->options['regex'] ) && | |
$this->getFieldName() === $other->getFieldName() | |
) { | |
$this->options['regex'] = array_merge( $this->options['regex'], $other->options['regex'] ); | |
$mergedInsensitivity = $this->options['regex_case_insensitive'] || $other->options['regex_case_insensitive']; | |
$this->options['regex_case_insensitive'] = $mergedInsensitivity; | |
return $this; | |
} else { | |
return parent::merge( $other ); | |
} | |
} | |
/** | |
* @return ExperimentalHighlightedFieldBuilder | |
*/ | |
public function skipIfLastMatched(): BaseHighlightedField { | |
$this->addOption( 'skip_if_last_matched', true ); | |
return $this; | |
} | |
/** | |
* @return array | |
*/ | |
public static function getFactories() { | |
return [ | |
SearchQuery::SEARCH_TEXT => [ | |
'title' => self::entireValue(), | |
'redirect.title' => self::redirectAndHeadings(), | |
'category' => self::redirectAndHeadings(), | |
'heading' => self::redirectAndHeadings(), | |
'text' => self::mainText(), | |
'source_text.plain' => self::mainText(), | |
'auxiliary_text' => self::text(), | |
'file_text' => self::text(), | |
] | |
]; | |
} | |
} |