Commit 4eefbbc0 authored by Donald H. (Donnie) Pinkston, III's avatar Donald H. (Donnie) Pinkston, III
Browse files

Initial version of CostBasedJoinPlanner for HW4

parent c7b4e8f9
Pipeline #7105 failed with stages
Showing with 426 additions and 0 deletions
+426 -0
package edu.caltech.nanodb.queryeval;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import edu.caltech.nanodb.expressions.Expression;
import edu.caltech.nanodb.plannodes.FileScanNode;
import edu.caltech.nanodb.plannodes.PlanNode;
import edu.caltech.nanodb.plannodes.SelectNode;
import edu.caltech.nanodb.queryast.FromClause;
import edu.caltech.nanodb.queryast.SelectClause;
import edu.caltech.nanodb.relations.TableInfo;
import edu.caltech.nanodb.storage.StorageManager;
/**
* This planner implementation uses dynamic programming to devise an optimal
* join strategy for the query. As always, queries are optimized in units of
* <tt>SELECT</tt>-<tt>FROM</tt>-<tt>WHERE</tt> subqueries; optimizations
* don't currently span multiple subqueries.
*/
public class CostBasedJoinPlanner implements Planner {
/** A logging object for reporting anything interesting that happens. */
private static Logger logger = LogManager.getLogger(
CostBasedJoinPlanner.class);
/** The storage manager used during query planning. */
protected StorageManager storageManager;
/** Sets the server to be used during query planning. */
public void setStorageManager(StorageManager storageManager) {
if (storageManager == null)
throw new IllegalArgumentException("storageManager cannot be null");
this.storageManager = storageManager;
}
/**
* This helper class is used to keep track of one "join component" in the
* dynamic programming algorithm. A join component is simply a query plan
* for joining one or more leaves of the query.
* <p>
* In this context, a "leaf" may either be a base table or a subquery in
* the <tt>FROM</tt>-clause of the query. However, the planner will
* attempt to push conjuncts down the plan as far as possible, so even if
* a leaf is a base table, the plan may be a bit more complex than just a
* single file-scan.
*/
private static class JoinComponent {
/**
* This is the join plan itself, that joins together all leaves
* specified in the {@link #leavesUsed} field.
*/
public PlanNode joinPlan;
/**
* This field specifies the collection of leaf-plans that are joined by
* the plan in this join-component.
*/
public HashSet<PlanNode> leavesUsed;
/**
* This field specifies the collection of all conjuncts use by this join
* plan. It allows us to easily determine what join conjuncts still
* remain to be incorporated into the query.
*/
public HashSet<Expression> conjunctsUsed;
/**
* Constructs a new instance for a <em>leaf node</em>. It should not
* be used for join-plans that join together two or more leaves. This
* constructor simply adds the leaf-plan into the {@link #leavesUsed}
* collection.
*
* @param leafPlan the query plan for this leaf of the query.
*
* @param conjunctsUsed the set of conjuncts used by the leaf plan.
* This may be an empty set if no conjuncts apply solely to
* this leaf, or it may be nonempty if some conjuncts apply
* solely to this leaf.
*/
public JoinComponent(PlanNode leafPlan, HashSet<Expression> conjunctsUsed) {
leavesUsed = new HashSet<>();
leavesUsed.add(leafPlan);
joinPlan = leafPlan;
this.conjunctsUsed = conjunctsUsed;
}
/**
* Constructs a new instance for a <em>non-leaf node</em>. It should
* not be used for leaf plans!
*
* @param joinPlan the query plan that joins together all leaves
* specified in the <tt>leavesUsed</tt> argument.
*
* @param leavesUsed the set of two or more leaf plans that are joined
* together by the join plan.
*
* @param conjunctsUsed the set of conjuncts used by the join plan.
* Obviously, it is expected that all conjuncts specified here
* can actually be evaluated against the join plan.
*/
public JoinComponent(PlanNode joinPlan, HashSet<PlanNode> leavesUsed,
HashSet<Expression> conjunctsUsed) {
this.joinPlan = joinPlan;
this.leavesUsed = leavesUsed;
this.conjunctsUsed = conjunctsUsed;
}
}
/**
* Returns the root of a plan tree suitable for executing the specified
* query.
*
* @param selClause an object describing the query to be performed
*
* @return a plan tree for executing the specified query
*/
public PlanNode makePlan(SelectClause selClause,
List<SelectClause> enclosingSelects) {
// TODO: Implement!
//
// This is a very rough sketch of how this function will work,
// focusing mainly on join planning:
//
// 1) Pull out the top-level conjuncts from the FROM and WHERE
// clauses on the query, since we will handle them in special ways
// if we have outer joins.
//
// 2) Create an optimal join plan from the top-level from-clause and
// the top-level conjuncts.
//
// 3) If there are any unused conjuncts, determine how to handle them.
//
// 4) Create a project plan-node if necessary.
//
// 5) Handle other clauses such as ORDER BY, LIMIT/OFFSET, etc.
//
// Supporting other query features, such as grouping/aggregation,
// various kinds of subqueries, queries without a FROM clause, etc.,
// can all be incorporated into this sketch relatively easily.
return null;
}
/**
* Given the top-level {@code FromClause} for a SELECT-FROM-WHERE block,
* this helper generates an optimal join plan for the {@code FromClause}.
*
* @param fromClause the top-level {@code FromClause} of a
* SELECT-FROM-WHERE block.
* @param extraConjuncts any extra conjuncts (e.g. from the WHERE clause,
* or HAVING clause)
* @return a {@code JoinComponent} object that represents the optimal plan
* corresponding to the FROM-clause
*/
private JoinComponent makeJoinPlan(FromClause fromClause,
Collection<Expression> extraConjuncts) {
// These variables receive the leaf-clauses and join conjuncts found
// from scanning the sub-clauses. Initially, we put the extra conjuncts
// into the collection of conjuncts.
HashSet<Expression> conjuncts = new HashSet<>();
ArrayList<FromClause> leafFromClauses = new ArrayList<>();
collectDetails(fromClause, conjuncts, leafFromClauses);
logger.debug("Making join-plan for " + fromClause);
logger.debug(" Collected conjuncts: " + conjuncts);
logger.debug(" Collected FROM-clauses: " + leafFromClauses);
logger.debug(" Extra conjuncts: " + extraConjuncts);
if (extraConjuncts != null)
conjuncts.addAll(extraConjuncts);
// Make a read-only set of the input conjuncts, to avoid bugs due to
// unintended side-effects.
Set<Expression> roConjuncts = Collections.unmodifiableSet(conjuncts);
// Create a subplan for every single leaf FROM-clause, and prepare the
// leaf-plan.
logger.debug("Generating plans for all leaves");
ArrayList<JoinComponent> leafComponents = generateLeafJoinComponents(
leafFromClauses, roConjuncts);
// Print out the results, for debugging purposes.
if (logger.isDebugEnabled()) {
for (JoinComponent leaf : leafComponents) {
logger.debug(" Leaf plan:\n" +
PlanNode.printNodeTreeToString(leaf.joinPlan, true));
}
}
// Build up the full query-plan using a dynamic programming approach.
JoinComponent optimalJoin =
generateOptimalJoin(leafComponents, roConjuncts);
PlanNode plan = optimalJoin.joinPlan;
logger.info("Optimal join plan generated:\n" +
PlanNode.printNodeTreeToString(plan, true));
return optimalJoin;
}
/**
* This helper method pulls the essential details for join optimization
* out of a <tt>FROM</tt> clause.
*
* TODO: FILL IN DETAILS.
*
* @param fromClause the from-clause to collect details from
*
* @param conjuncts the collection to add all conjuncts to
*
* @param leafFromClauses the collection to add all leaf from-clauses to
*/
private void collectDetails(FromClause fromClause,
HashSet<Expression> conjuncts, ArrayList<FromClause> leafFromClauses) {
// TODO: IMPLEMENT
}
/**
* This helper method performs the first step of the dynamic programming
* process to generate an optimal join plan, by generating a plan for every
* leaf from-clause identified from analyzing the query. Leaf plans are
* usually very simple; they are built either from base-tables or
* <tt>SELECT</tt> subqueries. The most complex detail is that any
* conjuncts in the query that can be evaluated solely against a particular
* leaf plan-node will be associated with the plan node. <em>This is a
* heuristic</em> that usually produces good plans (and certainly will for
* the current state of the database), but could easily interfere with
* indexes or other plan optimizations.
*
* @param leafFromClauses the collection of from-clauses found in the query
*
* @param conjuncts the collection of conjuncts that can be applied at this
* level
*
* @return a collection of {@link JoinComponent} object containing the plans
* and other details for each leaf from-clause
*/
private ArrayList<JoinComponent> generateLeafJoinComponents(
Collection<FromClause> leafFromClauses, Collection<Expression> conjuncts) {
// Create a subplan for every single leaf FROM-clause, and prepare the
// leaf-plan.
ArrayList<JoinComponent> leafComponents = new ArrayList<>();
for (FromClause leafClause : leafFromClauses) {
HashSet<Expression> leafConjuncts = new HashSet<>();
PlanNode leafPlan =
makeLeafPlan(leafClause, conjuncts, leafConjuncts);
JoinComponent leaf = new JoinComponent(leafPlan, leafConjuncts);
leafComponents.add(leaf);
}
return leafComponents;
}
/**
* Constructs a plan tree for evaluating the specified from-clause.
* TODO: COMPLETE THE DOCUMENTATION
*
* @param fromClause the select nodes that need to be joined.
*
* @param conjuncts additional conjuncts that can be applied when
* constructing the from-clause plan.
*
* @param leafConjuncts this is an output-parameter. Any conjuncts
* applied in this plan from the <tt>conjuncts</tt> collection
* should be added to this out-param.
*
* @return a plan tree for evaluating the specified from-clause
*
* @throws IllegalArgumentException if the specified from-clause is a join
* expression that isn't an outer join, or has some other
* unrecognized type.
*/
private PlanNode makeLeafPlan(FromClause fromClause,
Collection<Expression> conjuncts, HashSet<Expression> leafConjuncts) {
// TODO: IMPLEMENT.
// If you apply any conjuncts then make sure to add them to the
// leafConjuncts collection.
//
// Don't forget that all from-clauses can specify an alias.
//
// Concentrate on properly handling cases other than outer
// joins first, then focus on outer joins once you have the
// typical cases supported.
return null;
}
/**
* This helper method builds up a full join-plan using a dynamic programming
* approach. The implementation maintains a collection of optimal
* intermediate plans that join <em>n</em> of the leaf nodes, each with its
* own associated cost, and then uses that collection to generate a new
* collection of optimal intermediate plans that join <em>n+1</em> of the
* leaf nodes. This process completes when all leaf plans are joined
* together; there will be <em>one</em> plan, and it will be the optimal
* join plan (as far as our limited estimates can determine, anyway).
*
* @param leafComponents the collection of leaf join-components, generated
* by the {@link #generateLeafJoinComponents} method.
*
* @param conjuncts the collection of all conjuncts found in the query
*
* @return a single {@link JoinComponent} object that joins all leaf
* components together in an optimal way.
*/
private JoinComponent generateOptimalJoin(
ArrayList<JoinComponent> leafComponents, Set<Expression> conjuncts) {
// This object maps a collection of leaf-plans (represented as a
// hash-set) to the optimal join-plan for that collection of leaf plans.
//
// This collection starts out only containing the leaf plans themselves,
// and on each iteration of the loop below, join-plans are grown by one
// leaf. For example:
// * In the first iteration, all plans joining 2 leaves are created.
// * In the second iteration, all plans joining 3 leaves are created.
// * etc.
// At the end, the collection will contain ONE entry, which is the
// optimal way to join all N leaves. Go Go Gadget Dynamic Programming!
HashMap<HashSet<PlanNode>, JoinComponent> joinPlans = new HashMap<>();
// Initially populate joinPlans with just the N leaf plans.
for (JoinComponent leaf : leafComponents)
joinPlans.put(leaf.leavesUsed, leaf);
while (joinPlans.size() > 1) {
logger.debug("Current set of join-plans has " + joinPlans.size() +
" plans in it.");
// This is the set of "next plans" we will generate. Plans only
// get stored if they are the first plan that joins together the
// specified leaves, or if they are better than the current plan.
HashMap<HashSet<PlanNode>, JoinComponent> nextJoinPlans =
new HashMap<>();
// TODO: IMPLEMENT THE CODE THAT GENERATES OPTIMAL PLANS THAT
// JOIN N + 1 LEAVES
// Now that we have generated all plans joining N leaves, time to
// create all plans joining N + 1 leaves.
joinPlans = nextJoinPlans;
}
// At this point, the set of join plans should only contain one plan,
// and it should be the optimal plan.
assert joinPlans.size() == 1 : "There can be only one optimal join plan!";
return joinPlans.values().iterator().next();
}
/**
* Constructs a simple select plan that reads directly from a table, with
* an optional predicate for selecting rows.
* <p>
* While this method can be used for building up larger <tt>SELECT</tt>
* queries, the returned plan is also suitable for use in <tt>UPDATE</tt>
* and <tt>DELETE</tt> command evaluation. In these cases, the plan must
* only generate tuples of type {@link edu.caltech.nanodb.storage.PageTuple},
* so that the command can modify or delete the actual tuple in the file's
* page data.
*
* @param tableName The name of the table that is being selected from.
*
* @param predicate An optional selection predicate, or {@code null} if
* no filtering is desired.
*
* @return A new plan-node for evaluating the select operation.
*/
public SelectNode makeSimpleSelect(String tableName, Expression predicate,
List<SelectClause> enclosingSelects) {
if (tableName == null)
throw new IllegalArgumentException("tableName cannot be null");
if (enclosingSelects != null) {
// If there are enclosing selects, this subquery's predicate may
// reference an outer query's value, but we don't detect that here.
// Therefore we will probably fail with an unrecognized column
// reference.
logger.warn("Currently we are not clever enough to detect " +
"correlated subqueries, so expect things are about to break...");
}
// Open the table.
TableInfo tableInfo = storageManager.getTableManager().openTable(tableName);
// Make a SelectNode to read rows from the table, with the specified
// predicate.
SelectNode selectNode = new FileScanNode(tableInfo, predicate);
selectNode.prepare();
return selectNode;
}
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment