1 package org.wikimedia.search.extra.analysis.turkish;
2
3
4
5 import java.io.IOException;
6
7 import org.apache.lucene.analysis.Analyzer;
8 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
9 import org.apache.lucene.analysis.TokenStream;
10 import org.apache.lucene.analysis.Tokenizer;
11 import org.apache.lucene.analysis.core.WhitespaceTokenizer;
12 import org.apache.lucene.analysis.tr.TurkishLowerCaseFilter;
13 import org.junit.Test;
14
15 public class BetterApostropheFilterTest extends BaseTokenStreamTestCase {
16
17 @Test
18 public void simpleTest() throws IOException {
19 String input = "Wikipedia'nın sunucuları ABD’de";
20 try (Analyzer ws = newBetterApostrophe()) {
21 TokenStream ts = ws.tokenStream("", input);
22 assertTokenStreamContents(ts,
23 new String[]{"wikipedia", "sunucuları", "abd"},
24 new int[]{0, 14, 25},
25 new int[]{13, 24, 31},
26 null,
27 new int[]{1, 1, 1},
28 null,
29 31,
30 null,
31 true);
32 }
33 }
34
35 private Analyzer newBetterApostrophe() {
36 return new Analyzer() {
37 @Override
38 protected TokenStreamComponents createComponents(String fieldName) {
39 Tokenizer tok = new WhitespaceTokenizer();
40 TokenStream ts = new TurkishLowerCaseFilter(tok);
41 ts = new BetterApostropheFilter(ts);
42 return new TokenStreamComponents(tok, ts);
43 }
44 };
45 }
46
47 }