1 package org.wikimedia.search.extra.regex.ngram;
2
3 import org.apache.lucene.analysis.Analyzer;
4 import org.apache.lucene.util.automaton.Automaton;
5 import org.wikimedia.search.extra.regex.expression.Expression;
6 import org.wikimedia.search.extra.regex.expression.True;
7
8
9
10
11 public class NGramExtractor {
12 private final int gramSize;
13 private final int maxExpand;
14 private final int maxStatesTraced;
15 private final int maxNgrams;
16 private final Analyzer ngramAnalyzer;
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33 public NGramExtractor(int gramSize, int maxExpand, int maxStatesTraced, int maxNgrams, Analyzer ngramAnalyzer) {
34 this.gramSize = gramSize;
35 this.maxExpand = maxExpand;
36 this.maxStatesTraced = maxStatesTraced;
37 this.maxNgrams = maxNgrams;
38 this.ngramAnalyzer = ngramAnalyzer;
39 }
40
41
42
43
44 public Expression<String> extract(Automaton automaton) {
45 if (automaton.isAccept(0)) {
46 return True.instance();
47 }
48 return new NGramAutomaton(automaton, gramSize, maxExpand, maxStatesTraced, maxNgrams, ngramAnalyzer).expression().simplify();
49 }
50 }