1
2
3
4
5
6
7
8
9
10
11
12
13
14 package org.wikimedia.search.extra.analysis.textify;
15
16 import static java.util.Collections.unmodifiableMap;
17
18 import java.io.IOException;
19 import java.io.Reader;
20 import java.util.Map;
21
22 import org.apache.lucene.analysis.charfilter.BaseCharFilter;
23
24 public class LimitedMappingCharFilter extends BaseCharFilter {
25
26 private final Map<Integer, Integer> oneCharMap;
27 private int outputCharCount;
28 private int cumulativeOffset;
29
30 LimitedMappingCharFilter(Map<Integer, Integer> map, Reader in) {
31 super(in);
32
33 for (Map.Entry<Integer, Integer> pair : map.entrySet()) {
34 if (pair.getKey() < 0 || pair.getKey() > 0xFFFF) {
35 throw new IllegalArgumentException("mapping keys must be between 0 and 0xFFFF");
36 }
37 if (pair.getValue() < -1 || pair.getValue() > 0xFFFF) {
38 throw new IllegalArgumentException("mapping values must be between -1 and 0xFFFF");
39 }
40 }
41
42 oneCharMap = unmodifiableMap(map);
43 }
44
45 @Override
46 public int read() throws IOException {
47 int c = 0;
48 boolean doneReading = false;
49
50 while (!doneReading) {
51 c = input.read();
52 if (c == -1) {
53
54 return c;
55 }
56 c = oneCharMap.getOrDefault(c, c);
57 if (c == -1) {
58
59 cumulativeOffset++;
60 addOffCorrectMap(outputCharCount, cumulativeOffset);
61 } else {
62 doneReading = true;
63 }
64 }
65 outputCharCount++;
66 return c;
67 }
68
69 @Override
70 public int read(char[] cbuf, int offset, int len) throws IOException {
71 int charsRead = 0;
72 for (int i = offset; i < offset + len; i++) {
73 int c = read();
74 if (c == -1) {
75 break;
76 }
77 cbuf[i] = (char) c;
78 charsRead++;
79 }
80
81 return charsRead == 0 && len > 0 ? -1 : charsRead;
82 }
83
84 @Override
85 public void reset() throws IOException {
86 input.reset();
87 outputCharCount = 0;
88 cumulativeOffset = 0;
89 }
90
91 }