1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
package edu.caltech.nanodb.queryeval;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import edu.caltech.nanodb.expressions.Expression;
import edu.caltech.nanodb.plannodes.FileScanNode;
import edu.caltech.nanodb.plannodes.PlanNode;
import edu.caltech.nanodb.plannodes.SelectNode;
import edu.caltech.nanodb.queryast.FromClause;
import edu.caltech.nanodb.queryast.SelectClause;
import edu.caltech.nanodb.relations.TableInfo;
import edu.caltech.nanodb.storage.StorageManager;
/**
* This planner implementation uses dynamic programming to devise an optimal
* join strategy for the query. As always, queries are optimized in units of
* <tt>SELECT</tt>-<tt>FROM</tt>-<tt>WHERE</tt> subqueries; optimizations
* don't currently span multiple subqueries.
*/
public class CostBasedJoinPlanner implements Planner {
/** A logging object for reporting anything interesting that happens. */
private static Logger logger = LogManager.getLogger(
CostBasedJoinPlanner.class);
/** The storage manager used during query planning. */
protected StorageManager storageManager;
/** Sets the server to be used during query planning. */
public void setStorageManager(StorageManager storageManager) {
if (storageManager == null)
throw new IllegalArgumentException("storageManager cannot be null");
this.storageManager = storageManager;
}
/**
* This helper class is used to keep track of one "join component" in the
* dynamic programming algorithm. A join component is simply a query plan
* for joining one or more leaves of the query.
* <p>
* In this context, a "leaf" may either be a base table or a subquery in
* the <tt>FROM</tt>-clause of the query. However, the planner will
* attempt to push conjuncts down the plan as far as possible, so even if
* a leaf is a base table, the plan may be a bit more complex than just a
* single file-scan.
*/
private static class JoinComponent {
/**
* This is the join plan itself, that joins together all leaves
* specified in the {@link #leavesUsed} field.
*/
public PlanNode joinPlan;
/**
* This field specifies the collection of leaf-plans that are joined by
* the plan in this join-component.
*/
public HashSet<PlanNode> leavesUsed;
/**
* This field specifies the collection of all conjuncts use by this join
* plan. It allows us to easily determine what join conjuncts still
* remain to be incorporated into the query.
*/
public HashSet<Expression> conjunctsUsed;
/**
* Constructs a new instance for a <em>leaf node</em>. It should not
* be used for join-plans that join together two or more leaves. This
* constructor simply adds the leaf-plan into the {@link #leavesUsed}
* collection.
*
* @param leafPlan the query plan for this leaf of the query.
*
* @param conjunctsUsed the set of conjuncts used by the leaf plan.
* This may be an empty set if no conjuncts apply solely to
* this leaf, or it may be nonempty if some conjuncts apply
* solely to this leaf.
*/
public JoinComponent(PlanNode leafPlan, HashSet<Expression> conjunctsUsed) {
leavesUsed = new HashSet<>();
leavesUsed.add(leafPlan);
joinPlan = leafPlan;
this.conjunctsUsed = conjunctsUsed;
}
/**
* Constructs a new instance for a <em>non-leaf node</em>. It should
* not be used for leaf plans!
*
* @param joinPlan the query plan that joins together all leaves
* specified in the <tt>leavesUsed</tt> argument.
*
* @param leavesUsed the set of two or more leaf plans that are joined
* together by the join plan.
*
* @param conjunctsUsed the set of conjuncts used by the join plan.
* Obviously, it is expected that all conjuncts specified here
* can actually be evaluated against the join plan.
*/
public JoinComponent(PlanNode joinPlan, HashSet<PlanNode> leavesUsed,
HashSet<Expression> conjunctsUsed) {
this.joinPlan = joinPlan;
this.leavesUsed = leavesUsed;
this.conjunctsUsed = conjunctsUsed;
}
}
/**
* Returns the root of a plan tree suitable for executing the specified
* query.
*
* @param selClause an object describing the query to be performed
*
* @return a plan tree for executing the specified query
*/
public PlanNode makePlan(SelectClause selClause,
List<SelectClause> enclosingSelects) {
// TODO: Implement!
//
// This is a very rough sketch of how this function will work,
// focusing mainly on join planning:
//
// 1) Pull out the top-level conjuncts from the FROM and WHERE
// clauses on the query, since we will handle them in special ways
// if we have outer joins.
//
// 2) Create an optimal join plan from the top-level from-clause and
// the top-level conjuncts.
//
// 3) If there are any unused conjuncts, determine how to handle them.
//
// 4) Create a project plan-node if necessary.
//
// 5) Handle other clauses such as ORDER BY, LIMIT/OFFSET, etc.
//
// Supporting other query features, such as grouping/aggregation,
// various kinds of subqueries, queries without a FROM clause, etc.,
// can all be incorporated into this sketch relatively easily.
return null;
}
/**
* Given the top-level {@code FromClause} for a SELECT-FROM-WHERE block,
* this helper generates an optimal join plan for the {@code FromClause}.
*
* @param fromClause the top-level {@code FromClause} of a
* SELECT-FROM-WHERE block.
* @param extraConjuncts any extra conjuncts (e.g. from the WHERE clause,
* or HAVING clause)
* @return a {@code JoinComponent} object that represents the optimal plan
* corresponding to the FROM-clause
*/
private JoinComponent makeJoinPlan(FromClause fromClause,
Collection<Expression> extraConjuncts) {
// These variables receive the leaf-clauses and join conjuncts found
// from scanning the sub-clauses. Initially, we put the extra conjuncts
// into the collection of conjuncts.
HashSet<Expression> conjuncts = new HashSet<>();
ArrayList<FromClause> leafFromClauses = new ArrayList<>();
collectDetails(fromClause, conjuncts, leafFromClauses);
logger.debug("Making join-plan for " + fromClause);
logger.debug(" Collected conjuncts: " + conjuncts);
logger.debug(" Collected FROM-clauses: " + leafFromClauses);
logger.debug(" Extra conjuncts: " + extraConjuncts);
if (extraConjuncts != null)
conjuncts.addAll(extraConjuncts);
// Make a read-only set of the input conjuncts, to avoid bugs due to
// unintended side-effects.
Set<Expression> roConjuncts = Collections.unmodifiableSet(conjuncts);
// Create a subplan for every single leaf FROM-clause, and prepare the
// leaf-plan.
logger.debug("Generating plans for all leaves");
ArrayList<JoinComponent> leafComponents = generateLeafJoinComponents(
leafFromClauses, roConjuncts);
// Print out the results, for debugging purposes.
if (logger.isDebugEnabled()) {
for (JoinComponent leaf : leafComponents) {
logger.debug(" Leaf plan:\n" +
PlanNode.printNodeTreeToString(leaf.joinPlan, true));
}
}
// Build up the full query-plan using a dynamic programming approach.
JoinComponent optimalJoin =
generateOptimalJoin(leafComponents, roConjuncts);
PlanNode plan = optimalJoin.joinPlan;
logger.info("Optimal join plan generated:\n" +
PlanNode.printNodeTreeToString(plan, true));
return optimalJoin;
}
/**
* This helper method pulls the essential details for join optimization
* out of a <tt>FROM</tt> clause.
*
* TODO: FILL IN DETAILS.
*
* @param fromClause the from-clause to collect details from
*
* @param conjuncts the collection to add all conjuncts to
*
* @param leafFromClauses the collection to add all leaf from-clauses to
*/
private void collectDetails(FromClause fromClause,
HashSet<Expression> conjuncts, ArrayList<FromClause> leafFromClauses) {
// TODO: IMPLEMENT
}
/**
* This helper method performs the first step of the dynamic programming
* process to generate an optimal join plan, by generating a plan for every
* leaf from-clause identified from analyzing the query. Leaf plans are
* usually very simple; they are built either from base-tables or
* <tt>SELECT</tt> subqueries. The most complex detail is that any
* conjuncts in the query that can be evaluated solely against a particular
* leaf plan-node will be associated with the plan node. <em>This is a
* heuristic</em> that usually produces good plans (and certainly will for
* the current state of the database), but could easily interfere with
* indexes or other plan optimizations.
*
* @param leafFromClauses the collection of from-clauses found in the query
*
* @param conjuncts the collection of conjuncts that can be applied at this
* level
*
* @return a collection of {@link JoinComponent} object containing the plans
* and other details for each leaf from-clause
*/
private ArrayList<JoinComponent> generateLeafJoinComponents(
Collection<FromClause> leafFromClauses, Collection<Expression> conjuncts) {
// Create a subplan for every single leaf FROM-clause, and prepare the
// leaf-plan.
ArrayList<JoinComponent> leafComponents = new ArrayList<>();
for (FromClause leafClause : leafFromClauses) {
HashSet<Expression> leafConjuncts = new HashSet<>();
PlanNode leafPlan =
makeLeafPlan(leafClause, conjuncts, leafConjuncts);
JoinComponent leaf = new JoinComponent(leafPlan, leafConjuncts);
leafComponents.add(leaf);
}
return leafComponents;
}
/**
* Constructs a plan tree for evaluating the specified from-clause.
* TODO: COMPLETE THE DOCUMENTATION
*
* @param fromClause the select nodes that need to be joined.
*
* @param conjuncts additional conjuncts that can be applied when
* constructing the from-clause plan.
*
* @param leafConjuncts this is an output-parameter. Any conjuncts
* applied in this plan from the <tt>conjuncts</tt> collection
* should be added to this out-param.
*
* @return a plan tree for evaluating the specified from-clause
*
* @throws IllegalArgumentException if the specified from-clause is a join
* expression that isn't an outer join, or has some other
* unrecognized type.
*/
private PlanNode makeLeafPlan(FromClause fromClause,
Collection<Expression> conjuncts, HashSet<Expression> leafConjuncts) {
// TODO: IMPLEMENT.
// If you apply any conjuncts then make sure to add them to the
// leafConjuncts collection.
//
// Don't forget that all from-clauses can specify an alias.
//
// Concentrate on properly handling cases other than outer
// joins first, then focus on outer joins once you have the
// typical cases supported.
return null;
}
/**
* This helper method builds up a full join-plan using a dynamic programming
* approach. The implementation maintains a collection of optimal
* intermediate plans that join <em>n</em> of the leaf nodes, each with its
* own associated cost, and then uses that collection to generate a new
* collection of optimal intermediate plans that join <em>n+1</em> of the
* leaf nodes. This process completes when all leaf plans are joined
* together; there will be <em>one</em> plan, and it will be the optimal
* join plan (as far as our limited estimates can determine, anyway).
*
* @param leafComponents the collection of leaf join-components, generated
* by the {@link #generateLeafJoinComponents} method.
*
* @param conjuncts the collection of all conjuncts found in the query
*
* @return a single {@link JoinComponent} object that joins all leaf
* components together in an optimal way.
*/
private JoinComponent generateOptimalJoin(
ArrayList<JoinComponent> leafComponents, Set<Expression> conjuncts) {
// This object maps a collection of leaf-plans (represented as a
// hash-set) to the optimal join-plan for that collection of leaf plans.
//
// This collection starts out only containing the leaf plans themselves,
// and on each iteration of the loop below, join-plans are grown by one
// leaf. For example:
// * In the first iteration, all plans joining 2 leaves are created.
// * In the second iteration, all plans joining 3 leaves are created.
// * etc.
// At the end, the collection will contain ONE entry, which is the
// optimal way to join all N leaves. Go Go Gadget Dynamic Programming!
HashMap<HashSet<PlanNode>, JoinComponent> joinPlans = new HashMap<>();
// Initially populate joinPlans with just the N leaf plans.
for (JoinComponent leaf : leafComponents)
joinPlans.put(leaf.leavesUsed, leaf);
while (joinPlans.size() > 1) {
logger.debug("Current set of join-plans has " + joinPlans.size() +
" plans in it.");
// This is the set of "next plans" we will generate. Plans only
// get stored if they are the first plan that joins together the
// specified leaves, or if they are better than the current plan.
HashMap<HashSet<PlanNode>, JoinComponent> nextJoinPlans =
new HashMap<>();
// TODO: IMPLEMENT THE CODE THAT GENERATES OPTIMAL PLANS THAT
// JOIN N + 1 LEAVES
// Now that we have generated all plans joining N leaves, time to
// create all plans joining N + 1 leaves.
joinPlans = nextJoinPlans;
}
// At this point, the set of join plans should only contain one plan,
// and it should be the optimal plan.
assert joinPlans.size() == 1 : "There can be only one optimal join plan!";
return joinPlans.values().iterator().next();
}
/**
* Constructs a simple select plan that reads directly from a table, with
* an optional predicate for selecting rows.
* <p>
* While this method can be used for building up larger <tt>SELECT</tt>
* queries, the returned plan is also suitable for use in <tt>UPDATE</tt>
* and <tt>DELETE</tt> command evaluation. In these cases, the plan must
* only generate tuples of type {@link edu.caltech.nanodb.storage.PageTuple},
* so that the command can modify or delete the actual tuple in the file's
* page data.
*
* @param tableName The name of the table that is being selected from.
*
* @param predicate An optional selection predicate, or {@code null} if
* no filtering is desired.
*
* @return A new plan-node for evaluating the select operation.
*/
public SelectNode makeSimpleSelect(String tableName, Expression predicate,
List<SelectClause> enclosingSelects) {
if (tableName == null)
throw new IllegalArgumentException("tableName cannot be null");
if (enclosingSelects != null) {
// If there are enclosing selects, this subquery's predicate may
// reference an outer query's value, but we don't detect that here.
// Therefore we will probably fail with an unrecognized column
// reference.
logger.warn("Currently we are not clever enough to detect " +
"correlated subqueries, so expect things are about to break...");
}
// Open the table.
TableInfo tableInfo = storageManager.getTableManager().openTable(tableName);
// Make a SelectNode to read rows from the table, with the specified
// predicate.
SelectNode selectNode = new FileScanNode(tableInfo, predicate);
selectNode.prepare();
return selectNode;
}
}