View Javadoc
1   package org.wikimedia.search.extra.regex.expression;
2   
3   import java.util.HashSet;
4   import java.util.Set;
5   
6   import com.google.common.collect.ImmutableSet;
7   
8   /**
9    * Boolean expression rewriter.
10   *
11   * @param <T> type holding leaves
12   */
13  public class ExpressionRewriter<T> {
14      public static final int MAX_BOOLEAN_CLAUSES = 1024;
15  
16      private final Expression<T> expression;
17  
18      public ExpressionRewriter(Expression<T> expression) {
19          this.expression = expression;
20      }
21  
22      public Expression<T> degradeAsDisjunction() {
23          return degradeAsDisjunction(MAX_BOOLEAN_CLAUSES);
24      }
25  
26      /**
27       * Degrade the boolean expression as a single disjunction with all the
28       * leaves. This expression is a super-set of the original expression
29       * (because we do not support negation).
30       *
31       * @return a flat disjunction expression
32       */
33      public Expression<T> degradeAsDisjunction(int maxResultingClauses) {
34          Set<Expression<T>> leaves = new HashSet<>();
35          Set<Expression<T>> visited = new HashSet<>();
36          if (!extractLeaves(expression, leaves, visited, maxResultingClauses)) {
37              return True.instance();
38          }
39          for (Expression<T> expression : leaves) {
40              if (expression.alwaysTrue()) {
41                  return True.instance();
42              }
43          }
44          return new Or<>(ImmutableSet.copyOf(leaves));
45      }
46  
47      private boolean extractLeaves(Expression<T> subExpr, Set<Expression<T>> leaves, Set<Expression<T>> visited, int maxResultingClauses) {
48          if (subExpr.isComposite()) {
49              for (Expression<T> exp : (AbstractCompositeExpression<T>) subExpr) {
50                  // NGramExtractor may generate a graph that reuses its branches
51                  // We just need to extract the leaves so there's no need
52                  // to visit the same branch twice.
53                  if (!visited.add(exp)) {
54                      continue;
55                  }
56                  if (!extractLeaves(exp, leaves, visited, maxResultingClauses)) {
57                      return false;
58                  }
59              }
60              return true;
61          } else {
62              if (leaves.add(subExpr)) {
63                  return leaves.size() <= maxResultingClauses;
64              }
65              return true;
66          }
67      }
68  }