Commit 4eefbbc0 authored by Donald H. (Donnie) Pinkston, III's avatar Donald H. (Donnie) Pinkston, III
Browse files

Initial version of CostBasedJoinPlanner for HW4

parent c7b4e8f9
No related merge requests found
Pipeline #7105 failed with stages
Showing with 426 additions and 0 deletions
+426 -0
package edu.caltech.nanodb.queryeval;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import edu.caltech.nanodb.expressions.Expression;
import edu.caltech.nanodb.plannodes.FileScanNode;
import edu.caltech.nanodb.plannodes.PlanNode;
import edu.caltech.nanodb.plannodes.SelectNode;
import edu.caltech.nanodb.queryast.FromClause;
import edu.caltech.nanodb.queryast.SelectClause;
import edu.caltech.nanodb.relations.TableInfo;
import edu.caltech.nanodb.storage.StorageManager;
/**
* This planner implementation uses dynamic programming to devise an optimal
* join strategy for the query. As always, queries are optimized in units of
* <tt>SELECT</tt>-<tt>FROM</tt>-<tt>WHERE</tt> subqueries; optimizations
* don't currently span multiple subqueries.
*/
public class CostBasedJoinPlanner implements Planner {
/** A logging object for reporting anything interesting that happens. */
private static Logger logger = LogManager.getLogger(
CostBasedJoinPlanner.class);
/** The storage manager used during query planning. */
protected StorageManager storageManager;
/** Sets the server to be used during query planning. */
public void setStorageManager(StorageManager storageManager) {
if (storageManager == null)
throw new IllegalArgumentException("storageManager cannot be null");
this.storageManager = storageManager;
}
/**
* This helper class is used to keep track of one "join component" in the
* dynamic programming algorithm. A join component is simply a query plan
* for joining one or more leaves of the query.
* <p>
* In this context, a "leaf" may either be a base table or a subquery in
* the <tt>FROM</tt>-clause of the query. However, the planner will
* attempt to push conjuncts down the plan as far as possible, so even if
* a leaf is a base table, the plan may be a bit more complex than just a
* single file-scan.
*/
private static class JoinComponent {
/**
* This is the join plan itself, that joins together all leaves
* specified in the {@link #leavesUsed} field.
*/
public PlanNode joinPlan;
/**
* This field specifies the collection of leaf-plans that are joined by
* the plan in this join-component.
*/
public HashSet<PlanNode> leavesUsed;
/**
* This field specifies the collection of all conjuncts use by this join
* plan. It allows us to easily determine what join conjuncts still
* remain to be incorporated into the query.
*/
public HashSet<Expression> conjunctsUsed;
/**
* Constructs a new instance for a <em>leaf node</em>. It should not
* be used for join-plans that join together two or more leaves. This
* constructor simply adds the leaf-plan into the {@link #leavesUsed}
* collection.
*
* @param leafPlan the query plan for this leaf of the query.
*
* @param conjunctsUsed the set of conjuncts used by the leaf plan.
* This may be an empty set if no conjuncts apply solely to
* this leaf, or it may be nonempty if some conjuncts apply
* solely to this leaf.
*/
public JoinComponent(PlanNode leafPlan, HashSet<Expression> conjunctsUsed) {
leavesUsed = new HashSet<>();
leavesUsed.add(leafPlan);
joinPlan = leafPlan;
this.conjunctsUsed = conjunctsUsed;
}
/**
* Constructs a new instance for a <em>non-leaf node</em>. It should
* not be used for leaf plans!
*
* @param joinPlan the query plan that joins together all leaves
* specified in the <tt>leavesUsed</tt> argument.
*
* @param leavesUsed the set of two or more leaf plans that are joined
* together by the join plan.
*
* @param conjunctsUsed the set of conjuncts used by the join plan.
* Obviously, it is expected that all conjuncts specified here
* can actually be evaluated against the join plan.
*/
public JoinComponent(PlanNode joinPlan, HashSet<PlanNode> leavesUsed,
HashSet<Expression> conjunctsUsed) {
this.joinPlan = joinPlan;
this.leavesUsed = leavesUsed;
this.conjunctsUsed = conjunctsUsed;
}
}
/**
* Returns the root of a plan tree suitable for executing the specified
* query.
*
* @param selClause an object describing the query to be performed
*
* @return a plan tree for executing the specified query
*/
public PlanNode makePlan(SelectClause selClause,
List<SelectClause> enclosingSelects) {
// TODO: Implement!
//
// This is a very rough sketch of how this function will work,
// focusing mainly on join planning:
//
// 1) Pull out the top-level conjuncts from the FROM and WHERE
// clauses on the query, since we will handle them in special ways
// if we have outer joins.
//
// 2) Create an optimal join plan from the top-level from-clause and
// the top-level conjuncts.
//
// 3) If there are any unused conjuncts, determine how to handle them.
//
// 4) Create a project plan-node if necessary.
//
// 5) Handle other clauses such as ORDER BY, LIMIT/OFFSET, etc.
//
// Supporting other query features, such as grouping/aggregation,
// various kinds of subqueries, queries without a FROM clause, etc.,
// can all be incorporated into this sketch relatively easily.
return null;
}
/**
* Given the top-level {@code FromClause} for a SELECT-FROM-WHERE block,
* this helper generates an optimal join plan for the {@code FromClause}.
*
* @param fromClause the top-level {@code FromClause} of a
* SELECT-FROM-WHERE block.
* @param extraConjuncts any extra conjuncts (e.g. from the WHERE clause,
* or HAVING clause)
* @return a {@code JoinComponent} object that represents the optimal plan
* corresponding to the FROM-clause
*/
private JoinComponent makeJoinPlan(FromClause fromClause,
Collection<Expression> extraConjuncts) {
// These variables receive the leaf-clauses and join conjuncts found
// from scanning the sub-clauses. Initially, we put the extra conjuncts
// into the collection of conjuncts.
HashSet<Expression> conjuncts = new HashSet<>();
ArrayList<FromClause> leafFromClauses = new ArrayList<>();
collectDetails(fromClause, conjuncts, leafFromClauses);
logger.debug("Making join-plan for " + fromClause);
logger.debug(" Collected conjuncts: " + conjuncts);
logger.debug(" Collected FROM-clauses: " + leafFromClauses);
logger.debug(" Extra conjuncts: " + extraConjuncts);
if (extraConjuncts != null)
conjuncts.addAll(extraConjuncts);
// Make a read-only set of the input conjuncts, to avoid bugs due to
// unintended side-effects.
Set<Expression> roConjuncts = Collections.unmodifiableSet(conjuncts);
// Create a subplan for every single leaf FROM-clause, and prepare the
// leaf-plan.
logger.debug("Generating plans for all leaves");
ArrayList<JoinComponent> leafComponents = generateLeafJoinComponents(
leafFromClauses, roConjuncts);
// Print out the results, for debugging purposes.
if (logger.isDebugEnabled()) {
for (JoinComponent leaf : leafComponents) {
logger.debug(" Leaf plan:\n" +
PlanNode.printNodeTreeToString(leaf.joinPlan, true));
}
}
// Build up the full query-plan using a dynamic programming approach.
JoinComponent optimalJoin =
generateOptimalJoin(leafComponents, roConjuncts);
PlanNode plan = optimalJoin.joinPlan;
logger.info("Optimal join plan generated:\n" +
PlanNode.printNodeTreeToString(plan, true));
return optimalJoin;
}
/**
* This helper method pulls the essential details for join optimization
* out of a <tt>FROM</tt> clause.
*
* TODO: FILL IN DETAILS.
*
* @param fromClause the from-clause to collect details from
*
* @param conjuncts the collection to add all conjuncts to
*
* @param leafFromClauses the collection to add all leaf from-clauses to
*/
private void collectDetails(FromClause fromClause,
HashSet<Expression> conjuncts, ArrayList<FromClause> leafFromClauses) {
// TODO: IMPLEMENT
}
/**
* This helper method performs the first step of the dynamic programming
* process to generate an optimal join plan, by generating a plan for every
* leaf from-clause identified from analyzing the query. Leaf plans are
* usually very simple; they are built either from base-tables or
* <tt>SELECT</tt> subqueries. The most complex detail is that any
* conjuncts in the query that can be evaluated solely against a particular
* leaf plan-node will be associated with the plan node. <em>This is a
* heuristic</em> that usually produces good plans (and certainly will for
* the current state of the database), but could easily interfere with
* indexes or other plan optimizations.
*
* @param leafFromClauses the collection of from-clauses found in the query
*
* @param conjuncts the collection of conjuncts that can be applied at this
* level
*
* @return a collection of {@link JoinComponent} object containing the plans
* and other details for each leaf from-clause
*/
private ArrayList<JoinComponent> generateLeafJoinComponents(
Collection<FromClause> leafFromClauses, Collection<Expression> conjuncts) {
// Create a subplan for every single leaf FROM-clause, and prepare the
// leaf-plan.
ArrayList<JoinComponent> leafComponents = new ArrayList<>();
for (FromClause leafClause : leafFromClauses) {
HashSet<Expression> leafConjuncts = new HashSet<>();
PlanNode leafPlan =
makeLeafPlan(leafClause, conjuncts, leafConjuncts);
JoinComponent leaf = new JoinComponent(leafPlan, leafConjuncts);
leafComponents.add(leaf);
}
return leafComponents;
}
/**
* Constructs a plan tree for evaluating the specified from-clause.
* TODO: COMPLETE THE DOCUMENTATION
*
* @param fromClause the select nodes that need to be joined.
*
* @param conjuncts additional conjuncts that can be applied when
* constructing the from-clause plan.
*
* @param leafConjuncts this is an output-parameter. Any conjuncts
* applied in this plan from the <tt>conjuncts</tt> collection
* should be added to this out-param.
*
* @return a plan tree for evaluating the specified from-clause
*
* @throws IllegalArgumentException if the specified from-clause is a join
* expression that isn't an outer join, or has some other
* unrecognized type.
*/
private PlanNode makeLeafPlan(FromClause fromClause,
Collection<Expression> conjuncts, HashSet<Expression> leafConjuncts) {
// TODO: IMPLEMENT.
// If you apply any conjuncts then make sure to add them to the
// leafConjuncts collection.
//
// Don't forget that all from-clauses can specify an alias.
//
// Concentrate on properly handling cases other than outer
// joins first, then focus on outer joins once you have the
// typical cases supported.
return null;
}
/**
* This helper method builds up a full join-plan using a dynamic programming
* approach. The implementation maintains a collection of optimal
* intermediate plans that join <em>n</em> of the leaf nodes, each with its
* own associated cost, and then uses that collection to generate a new
* collection of optimal intermediate plans that join <em>n+1</em> of the
* leaf nodes. This process completes when all leaf plans are joined
* together; there will be <em>one</em> plan, and it will be the optimal
* join plan (as far as our limited estimates can determine, anyway).
*
* @param leafComponents the collection of leaf join-components, generated
* by the {@link #generateLeafJoinComponents} method.
*
* @param conjuncts the collection of all conjuncts found in the query
*
* @return a single {@link JoinComponent} object that joins all leaf
* components together in an optimal way.
*/
private JoinComponent generateOptimalJoin(
ArrayList<JoinComponent> leafComponents, Set<Expression> conjuncts) {
// This object maps a collection of leaf-plans (represented as a
// hash-set) to the optimal join-plan for that collection of leaf plans.
//
// This collection starts out only containing the leaf plans themselves,
// and on each iteration of the loop below, join-plans are grown by one
// leaf. For example:
// * In the first iteration, all plans joining 2 leaves are created.
// * In the second iteration, all plans joining 3 leaves are created.
// * etc.
// At the end, the collection will contain ONE entry, which is the
// optimal way to join all N leaves. Go Go Gadget Dynamic Programming!
HashMap<HashSet<PlanNode>, JoinComponent> joinPlans = new HashMap<>();
// Initially populate joinPlans with just the N leaf plans.
for (JoinComponent leaf : leafComponents)
joinPlans.put(leaf.leavesUsed, leaf);
while (joinPlans.size() > 1) {
logger.debug("Current set of join-plans has " + joinPlans.size() +
" plans in it.");
// This is the set of "next plans" we will generate. Plans only
// get stored if they are the first plan that joins together the
// specified leaves, or if they are better than the current plan.
HashMap<HashSet<PlanNode>, JoinComponent> nextJoinPlans =
new HashMap<>();
// TODO: IMPLEMENT THE CODE THAT GENERATES OPTIMAL PLANS THAT
// JOIN N + 1 LEAVES
// Now that we have generated all plans joining N leaves, time to
// create all plans joining N + 1 leaves.
joinPlans = nextJoinPlans;
}
// At this point, the set of join plans should only contain one plan,
// and it should be the optimal plan.
assert joinPlans.size() == 1 : "There can be only one optimal join plan!";
return joinPlans.values().iterator().next();
}
/**
* Constructs a simple select plan that reads directly from a table, with
* an optional predicate for selecting rows.
* <p>
* While this method can be used for building up larger <tt>SELECT</tt>
* queries, the returned plan is also suitable for use in <tt>UPDATE</tt>
* and <tt>DELETE</tt> command evaluation. In these cases, the plan must
* only generate tuples of type {@link edu.caltech.nanodb.storage.PageTuple},
* so that the command can modify or delete the actual tuple in the file's
* page data.
*
* @param tableName The name of the table that is being selected from.
*
* @param predicate An optional selection predicate, or {@code null} if
* no filtering is desired.
*
* @return A new plan-node for evaluating the select operation.
*/
public SelectNode makeSimpleSelect(String tableName, Expression predicate,
List<SelectClause> enclosingSelects) {
if (tableName == null)
throw new IllegalArgumentException("tableName cannot be null");
if (enclosingSelects != null) {
// If there are enclosing selects, this subquery's predicate may
// reference an outer query's value, but we don't detect that here.
// Therefore we will probably fail with an unrecognized column
// reference.
logger.warn("Currently we are not clever enough to detect " +
"correlated subqueries, so expect things are about to break...");
}
// Open the table.
TableInfo tableInfo = storageManager.getTableManager().openTable(tableName);
// Make a SelectNode to read rows from the table, with the specified
// predicate.
SelectNode selectNode = new FileScanNode(tableInfo, predicate);
selectNode.prepare();
return selectNode;
}
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment