1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
package edu.caltech.nanodb.expressions;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;
import org.apache.logging.log4j.Logger;
import org.apache.logging.log4j.LogManager;
import edu.caltech.nanodb.relations.ColumnInfo;
import edu.caltech.nanodb.relations.ColumnType;
import edu.caltech.nanodb.relations.Schema;
import edu.caltech.nanodb.relations.SchemaNameException;
/**
* <p>
* This class implements string matching operations. The supported operations
* are:
* </p>
* <ul>
* <li>basic SQL pattern matching, <tt>a [NOT] LIKE b</tt></li>
* <li>regex pattern matching, <tt>a [NOT] SIMILAR TO b</tt></li>
* </ul>
* <p>
* The <tt>a NOT LIKE ...</tt> and <tt>a NOT SIMILAR TO ...</tt> operation is
* translated into <tt>NOT (a LIKE ...)</tt> etc. by the parser.
* </p>
*/
public class StringMatchOperator extends Expression {
/** A logging object for reporting anything interesting that happens. **/
private static Logger logger = LogManager.getLogger(StringMatchOperator.class);
/**
* This enumeration specifies the types of matching that can be performed.
*/
public enum Type {
LIKE("LIKE"),
REGEX("SIMILAR TO");
/** The string representation for each operator. Used for printing. */
private final String stringRep;
/**
* Construct a Type enum with the specified string representation.
*
* @param rep the string representation of the comparison type
*/
Type(String rep) {
stringRep = rep;
}
/**
* Accessor for the operator type's string representation.
*
* @return the string representation of the comparison type
*/
public String stringRep() {
return stringRep;
}
}
/** The kind of comparison, such as "LIKE" or "SIMILAR TO." */
Type type;
/** The left expression in the comparison. */
Expression leftExpr;
/** The right expression in the comparison. */
Expression rightExpr;
/**
* If this is false, the operator computes <tt>IS LIKE</tt>; if true,
* the operator computes <tt>IS NOT LIKE</tt>.
*/
private boolean invert;
public StringMatchOperator(Type type, Expression lhs, Expression rhs) {
if (type == null || lhs == null || rhs == null)
throw new NullPointerException();
this.type = type;
leftExpr = lhs;
rightExpr = rhs;
}
public void setInvert(boolean invert) {
this.invert = invert;
}
public ColumnInfo getColumnInfo(Schema schema) throws SchemaNameException {
// Comparisons always return Boolean values, so just pass a Boolean
// value in to the TypeConverter to get out the corresponding SQL type.
ColumnType colType =
new ColumnType(TypeConverter.getSQLType(Boolean.FALSE));
return new ColumnInfo(colType);
}
/**
* This helper method converts a SQL <tt>LIKE</tt> match-expression into
* a corresponding regular expression. The challenge is that the match
* expression might contain regular-expression characters, so characters
* other than "%" and "_" must be quoted so they don't cause any effects.
*
* @param sqlLike the SQL <tt>LIKE</tt> match-expression
*
* @return a regular expression corresponding to the match-expression
*/
private String sqlLikeToRegex(String sqlLike) {
StringBuilder buf = new StringBuilder();
boolean inText = false;
for (int i = 0; i < sqlLike.length(); i++) {
char ch = sqlLike.charAt(i);
if (ch == '%') {
if (inText)
buf.append("\\E");
buf.append(".*");
inText = false;
}
else if (ch == '_') {
if (inText)
buf.append("\\E");
buf.append(".");
inText = false;
}
else {
if (!inText)
buf.append("\\Q");
buf.append(ch);
inText = true;
}
}
if (inText)
buf.append("\\E");
return buf.toString();
}
/**
* Evaluates this comparison expression and returns either
* {@link java.lang.Boolean#TRUE} or {@link java.lang.Boolean#FALSE}. If
* either the left-hand or right-hand expression evaluates to
* <code>null</code> (representing the SQL <tt>NULL</tt> value), then the
* expression's result is always <code>FALSE</code>.
*/
public Object evaluate(Environment env) throws ExpressionException {
// Evaluate the left and right subexpressions, and coerce them into
// strings.
String lhsValue = TypeConverter.getStringValue(leftExpr.evaluate(env));
String rhsValue = TypeConverter.getStringValue(rightExpr.evaluate(env));
// If either the LHS value or RHS value is NULL (represented by Java
// null value) then the entire expression evaluates to FALSE.
if (lhsValue == null || rhsValue == null)
return null;
boolean result;
switch (type) {
case LIKE:
// Convert the LIKE pattern into a regular expression. First,
// escape all the characters that could be meaningful in
// regex, and then replace the SQL wildcard expressions with
// corresponding regex expressions.
String rhsRegex = sqlLikeToRegex(rhsValue);
rhsRegex = rhsRegex.replaceAll("_", ".").replaceAll("%", ".*");
logger.info("Received LIKE pattern \"" + rhsValue +
"\", converted to regex \"" + rhsRegex + "\".");
result = Pattern.matches(rhsRegex, lhsValue);
break;
case REGEX:
try {
result = Pattern.matches(rhsValue, lhsValue);
}
catch (PatternSyntaxException e) {
throw new ExpressionException("Regex pattern \"" +
rhsValue + "\" is invalid!", e);
}
break;
default:
throw new ExpressionException(
"Unrecognized string-matching type " + type);
}
if (invert)
result = !result;
return result;
}
@Override
public Expression traverse(ExpressionProcessor p) {
p.enter(this);
leftExpr = leftExpr.traverse(p);
rightExpr = rightExpr.traverse(p);
return p.leave(this);
}
/**
* Returns a string representation of this comparison expression and its
* subexpressions.
*/
public String toString() {
// Convert all of the components into string representations.
String leftStr = leftExpr.toString();
String rightStr = rightExpr.toString();
String opStr = " " + type.stringRep() + " ";
if (invert)
opStr = " NOT" + opStr;
// For now, assume we don't need parentheses.
return leftStr + opStr + rightStr;
}
/**
* Returns the type of this comparison operator.
*
* @return the type of comparison
*/
public Type getType() {
return type;
}
/**
* Returns the left expression.
*
* @return the left expression
*/
public Expression getLeftExpression() {
return leftExpr;
}
/**
* Returns the right expression.
*
* @return the right expression
*/
public Expression getRightExpression() {
return rightExpr;
}
/**
* Checks if the argument is an expression with the same structure, but not
* necessarily the same references.
*
* @param obj the object to which we are comparing
*/
@Override
public boolean equals(Object obj) {
if (obj instanceof StringMatchOperator) {
StringMatchOperator other = (StringMatchOperator) obj;
return (type == other.type && invert == other.invert &&
leftExpr.equals(other.leftExpr) &&
rightExpr.equals(other.rightExpr));
}
return false;
}
/**
* Computes the hashcode of an Expression. This method is used to see if
* two expressions might be equal.
*/
@Override
public int hashCode() {
int hash = 7;
hash = 31 * hash + type.hashCode();
hash = 31 * hash + Boolean.hashCode(invert);
hash = 31 * hash + leftExpr.hashCode();
hash = 31 * hash + rightExpr.hashCode();
return hash;
}
/**
* Creates a copy of expression. This method is used by the
* {@link Expression#duplicate} method to make a deep copy of an expression
* tree.
*/
@Override
protected Object clone() throws CloneNotSupportedException {
StringMatchOperator expr = (StringMatchOperator) super.clone();
// Type is immutable; copy it.
expr.type = type;
// Clone the subexpressions
expr.leftExpr = (Expression) leftExpr.clone();
expr.rightExpr = (Expression) rightExpr.clone();
return expr;
}
}