1 package org.wikimedia.search.extra.analysis.ukrainian; 2 3 import java.io.IOException; 4 import java.io.UncheckedIOException; 5 6 import org.apache.lucene.analysis.TokenStream; 7 import org.elasticsearch.common.settings.Settings; 8 import org.elasticsearch.env.Environment; 9 import org.elasticsearch.index.IndexSettings; 10 import org.elasticsearch.index.analysis.AbstractTokenFilterFactory; 11 12 import morfologik.stemming.Dictionary; 13 14 public class UkrainianStemmerFilterFactory extends AbstractTokenFilterFactory { 15 16 protected static final Dictionary UK_DICT = getStemmingDict(); 17 18 public UkrainianStemmerFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) { 19 super(indexSettings, name, settings); 20 } 21 22 @Override public TokenStream create(TokenStream tokenStream) { 23 return new UkrainianStemmerFilter(tokenStream, UK_DICT); 24 } 25 26 private static Dictionary getStemmingDict() { 27 try { 28 return Dictionary.read(UkrainianStemmerFilterFactory.class.getClassLoader().getResource("ua/net/nlp/ukrainian.dict")); 29 } catch (IOException e) { 30 throw new UncheckedIOException("Could not load the Ukrainian stemmer dictionary.", e); 31 } 32 } 33 34 }