1 package org.wikimedia.search.extra.regex;
2
3 import static org.junit.Assert.assertFalse;
4 import static org.junit.Assert.assertTrue;
5
6 import java.io.IOException;
7 import java.util.Locale;
8
9 import org.apache.logging.log4j.LogManager;
10 import org.apache.logging.log4j.Logger;
11 import org.junit.Test;
12 import org.wikimedia.search.extra.regex.SourceRegexQuery.NonBacktrackingOnTheFlyCaseConvertingRechecker;
13 import org.wikimedia.search.extra.regex.SourceRegexQuery.NonBacktrackingRechecker;
14 import org.wikimedia.search.extra.regex.SourceRegexQuery.Rechecker;
15 import org.wikimedia.search.extra.regex.SourceRegexQuery.SlowRechecker;
16 import org.wikimedia.search.extra.regex.SourceRegexQueryBuilder.Settings;
17
18 import com.google.common.base.Charsets;
19 import com.google.common.collect.ImmutableList;
20 import com.google.common.io.Resources;
21
22 public class SourceRegexQueryRecheckTest {
23 private static final Logger LOG = LogManager.getLogger(SourceRegexQueryRecheckTest.class.getPackage().getName());
24
25 private final String rashidun;
26 private final String obama;
27
28 public SourceRegexQueryRecheckTest() throws IOException {
29 rashidun = Resources.toString(Resources.getResource("Rashidun Caliphate.txt"), Charsets.UTF_8);
30 obama = Resources.toString(Resources.getResource("Barack Obama.txt"), Charsets.UTF_8);
31 }
32
33 @Test
34 public void insensitiveNoMatch() {
35 Settings settings = new Settings();
36
37
38 many("case insensitive", ".*does not match anything", settings, 1000, false);
39 }
40
41 @Test
42 public void sensitiveNoMatch() {
43 Settings settings = new Settings();
44 settings.caseSensitive(true);
45 many("case sensitive", ".*does not match anything", settings, 1000, false);
46 }
47
48 @Test
49 public void insensitiveShortRegex() {
50 Settings settings = new Settings();
51 many("case insensitive", "cat", settings, 1000, true);
52 }
53
54 @Test
55 public void sensitiveShortRegex() {
56 Settings settings = new Settings();
57 settings.caseSensitive(true);
58 many("case sensitive", "cat", settings, 1000, true);
59 }
60
61 @Test
62 public void insensitiveLongerRegex() {
63 Settings settings = new Settings();
64 many("case insensitive", "\\[\\[Category:", settings, 1000, true);
65 }
66
67 @Test
68 public void sensitiveLongerRegex() {
69 Settings settings = new Settings();
70 settings.caseSensitive(true);
71 many("case sensitive", "\\[\\[Category:", settings, 1000, true);
72 }
73
74 @Test
75 public void insensitiveBacktrackyRegex() {
76 Settings settings = new Settings();
77 settings.caseSensitive(true);
78 many("case sensitive", "days.+and", settings, 1000, true);
79 }
80
81 @Test
82 public void sensitiveBacktrackyRegex() {
83 Settings settings = new Settings();
84 many("case sensitive", "days.+and", settings, 1000, true);
85 }
86
87 private void many(String name, String regex, Settings settings, int times, boolean isMatching) {
88 long slow = manyTestCase(new SlowRechecker(regex, settings), "slow", name, settings, times, regex, isMatching);
89 long nonBacktracking = manyTestCase(new NonBacktrackingRechecker(regex, settings), "non backtracking", name, settings, times, regex, isMatching);
90 if (!settings.caseSensitive()) {
91 long nonBacktrackingCaseConverting = manyTestCase(new NonBacktrackingOnTheFlyCaseConvertingRechecker(regex, settings),
92 "case converting", name, settings, times, regex, isMatching);
93 }
94 }
95
96 private long manyTestCase(Rechecker rechecker, String recheckerName, String name, Settings settings, int times, String regex, boolean isMatching) {
97 long start = System.currentTimeMillis();
98 for (int i = 0; i < times; i++) {
99 if (isMatching) {
100 assertTrue(rechecker.recheck(ImmutableList.of(rashidun)));
101 assertTrue(rechecker.recheck(ImmutableList.of(obama)));
102 } else {
103 assertFalse(rechecker.recheck(ImmutableList.of(rashidun)));
104 assertFalse(rechecker.recheck(ImmutableList.of(obama)));
105 }
106 }
107 long took = System.currentTimeMillis() - start;
108 LOG.info("{} took {} millis to match /{}/", String.format(Locale.ROOT, "%20s %10s", recheckerName, name), took, regex);
109 return took;
110 }
111 }