Normalizers.java

package org.wikimedia.search.glent.analysis;

import java.util.function.Function;

import com.beust.jcommander.converters.BaseConverter;

public class Normalizers extends BaseConverter<Function<String, String>> {
    public Normalizers(String s) {
        super(s);
    }

    public static Function<String, String> cirrusNearMatch() {
        return new CirrusNearMatchNormalizer();
    }

    public static Function<String, String> lowerCase() {
        return new LowerCaseNormalizer();
    }

    public static Function<String, String> icuTokNorm() {
        return new GlentRetokenizer(Tokenizers.icu());
    }

    @Override
    public Function<String, String> convert(String s) {
        switch (s) {
            case "lowercase": return lowerCase();
            case "cirrusnearmatch": return cirrusNearMatch();
            case "icutoknorm": return icuTokNorm();
            default: throw new IllegalArgumentException("Unknown normalizer [" + s + "]");
        }
    }
}