View Javadoc
1   package org.wikimedia.search.extra.regex;
2   
3   import static org.junit.Assert.assertFalse;
4   import static org.junit.Assert.assertTrue;
5   
6   import java.io.IOException;
7   import java.util.Locale;
8   
9   import org.apache.logging.log4j.LogManager;
10  import org.apache.logging.log4j.Logger;
11  import org.junit.Test;
12  import org.wikimedia.search.extra.regex.SourceRegexQuery.NonBacktrackingOnTheFlyCaseConvertingRechecker;
13  import org.wikimedia.search.extra.regex.SourceRegexQuery.NonBacktrackingRechecker;
14  import org.wikimedia.search.extra.regex.SourceRegexQuery.Rechecker;
15  import org.wikimedia.search.extra.regex.SourceRegexQuery.SlowRechecker;
16  import org.wikimedia.search.extra.regex.SourceRegexQueryBuilder.Settings;
17  
18  import com.google.common.base.Charsets;
19  import com.google.common.collect.ImmutableList;
20  import com.google.common.io.Resources;
21  
22  public class SourceRegexQueryRecheckTest {
23      private static final Logger LOG = LogManager.getLogger(SourceRegexQueryRecheckTest.class.getPackage().getName());
24  
25      private final String rashidun;
26      private final String obama;
27  
28      public SourceRegexQueryRecheckTest() throws IOException {
29          rashidun = Resources.toString(Resources.getResource("Rashidun Caliphate.txt"), Charsets.UTF_8);
30          obama = Resources.toString(Resources.getResource("Barack Obama.txt"), Charsets.UTF_8);
31      }
32  
33      @Test
34      public void insensitiveNoMatch() {
35          Settings settings = new Settings();
36          // This was a pathological case, taking more than a minute for a single iteration.
37          // Solved by prefixing .* to all regex's and making a single pass through the source.
38          many("case insensitive", ".*does not match anything", settings, 1000, false);
39      }
40  
41      @Test
42      public void sensitiveNoMatch() {
43          Settings settings = new Settings();
44          settings.caseSensitive(true);
45          many("case sensitive", ".*does not match anything", settings, 1000, false);
46      }
47  
48      @Test
49      public void insensitiveShortRegex() {
50          Settings settings = new Settings();
51          many("case insensitive", "cat", settings, 1000, true);
52      }
53  
54      @Test
55      public void sensitiveShortRegex() {
56          Settings settings = new Settings();
57          settings.caseSensitive(true);
58          many("case sensitive", "cat", settings, 1000, true);
59      }
60  
61      @Test
62      public void insensitiveLongerRegex() {
63          Settings settings = new Settings();
64          many("case insensitive", "\\[\\[Category:", settings, 1000, true);
65      }
66  
67      @Test
68      public void sensitiveLongerRegex() {
69          Settings settings = new Settings();
70          settings.caseSensitive(true);
71          many("case sensitive", "\\[\\[Category:", settings, 1000, true);
72      }
73  
74      @Test
75      public void insensitiveBacktrackyRegex() {
76          Settings settings = new Settings();
77          settings.caseSensitive(true);
78          many("case sensitive", "days.+and", settings, 1000, true);
79      }
80  
81      @Test
82      public void sensitiveBacktrackyRegex() {
83          Settings settings = new Settings();
84          many("case sensitive", "days.+and", settings, 1000, true);
85      }
86  
87      private void many(String name, String regex, Settings settings, int times, boolean isMatching) {
88          long slow = manyTestCase(new SlowRechecker(regex, settings), "slow", name, settings, times, regex, isMatching);
89          long nonBacktracking = manyTestCase(new NonBacktrackingRechecker(regex, settings), "non backtracking", name, settings, times, regex, isMatching);
90          if (!settings.caseSensitive()) {
91              long nonBacktrackingCaseConverting = manyTestCase(new NonBacktrackingOnTheFlyCaseConvertingRechecker(regex, settings),
92                      "case converting", name, settings, times, regex, isMatching);
93          }
94      }
95  
96      private long manyTestCase(Rechecker rechecker, String recheckerName, String name, Settings settings, int times, String regex, boolean isMatching) {
97          long start = System.currentTimeMillis();
98          for (int i = 0; i < times; i++) {
99              if (isMatching) {
100                 assertTrue(rechecker.recheck(ImmutableList.of(rashidun)));
101                 assertTrue(rechecker.recheck(ImmutableList.of(obama)));
102             } else {
103                 assertFalse(rechecker.recheck(ImmutableList.of(rashidun)));
104                 assertFalse(rechecker.recheck(ImmutableList.of(obama)));
105             }
106         }
107         long took = System.currentTimeMillis() - start;
108         LOG.info("{} took {} millis to match /{}/", String.format(Locale.ROOT, "%20s %10s", recheckerName, name), took, regex);
109         return took;
110     }
111 }