Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
cs122-19wi
nanodb-base
Commits
4eefbbc0
Commit
4eefbbc0
authored
6 years ago
by
Donald H. (Donnie) Pinkston, III
Browse files
Options
Download
Email Patches
Plain Diff
Initial version of CostBasedJoinPlanner for HW4
parent
c7b4e8f9
Pipeline
#7105
failed with stages
Changes
1
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
src/main/java/edu/caltech/nanodb/queryeval/CostBasedJoinPlanner.java
+426
-0
...va/edu/caltech/nanodb/queryeval/CostBasedJoinPlanner.java
with
426 additions
and
0 deletions
+426
-0
src/main/java/edu/caltech/nanodb/queryeval/CostBasedJoinPlanner.java
0 → 100644
View file @
4eefbbc0
package
edu.caltech.nanodb.queryeval
;
import
java.util.ArrayList
;
import
java.util.Collection
;
import
java.util.Collections
;
import
java.util.HashMap
;
import
java.util.HashSet
;
import
java.util.List
;
import
java.util.Set
;
import
org.apache.logging.log4j.LogManager
;
import
org.apache.logging.log4j.Logger
;
import
edu.caltech.nanodb.expressions.Expression
;
import
edu.caltech.nanodb.plannodes.FileScanNode
;
import
edu.caltech.nanodb.plannodes.PlanNode
;
import
edu.caltech.nanodb.plannodes.SelectNode
;
import
edu.caltech.nanodb.queryast.FromClause
;
import
edu.caltech.nanodb.queryast.SelectClause
;
import
edu.caltech.nanodb.relations.TableInfo
;
import
edu.caltech.nanodb.storage.StorageManager
;
/**
* This planner implementation uses dynamic programming to devise an optimal
* join strategy for the query. As always, queries are optimized in units of
* <tt>SELECT</tt>-<tt>FROM</tt>-<tt>WHERE</tt> subqueries; optimizations
* don't currently span multiple subqueries.
*/
public
class
CostBasedJoinPlanner
implements
Planner
{
/** A logging object for reporting anything interesting that happens. */
private
static
Logger
logger
=
LogManager
.
getLogger
(
CostBasedJoinPlanner
.
class
);
/** The storage manager used during query planning. */
protected
StorageManager
storageManager
;
/** Sets the server to be used during query planning. */
public
void
setStorageManager
(
StorageManager
storageManager
)
{
if
(
storageManager
==
null
)
throw
new
IllegalArgumentException
(
"storageManager cannot be null"
);
this
.
storageManager
=
storageManager
;
}
/**
* This helper class is used to keep track of one "join component" in the
* dynamic programming algorithm. A join component is simply a query plan
* for joining one or more leaves of the query.
* <p>
* In this context, a "leaf" may either be a base table or a subquery in
* the <tt>FROM</tt>-clause of the query. However, the planner will
* attempt to push conjuncts down the plan as far as possible, so even if
* a leaf is a base table, the plan may be a bit more complex than just a
* single file-scan.
*/
private
static
class
JoinComponent
{
/**
* This is the join plan itself, that joins together all leaves
* specified in the {@link #leavesUsed} field.
*/
public
PlanNode
joinPlan
;
/**
* This field specifies the collection of leaf-plans that are joined by
* the plan in this join-component.
*/
public
HashSet
<
PlanNode
>
leavesUsed
;
/**
* This field specifies the collection of all conjuncts use by this join
* plan. It allows us to easily determine what join conjuncts still
* remain to be incorporated into the query.
*/
public
HashSet
<
Expression
>
conjunctsUsed
;
/**
* Constructs a new instance for a <em>leaf node</em>. It should not
* be used for join-plans that join together two or more leaves. This
* constructor simply adds the leaf-plan into the {@link #leavesUsed}
* collection.
*
* @param leafPlan the query plan for this leaf of the query.
*
* @param conjunctsUsed the set of conjuncts used by the leaf plan.
* This may be an empty set if no conjuncts apply solely to
* this leaf, or it may be nonempty if some conjuncts apply
* solely to this leaf.
*/
public
JoinComponent
(
PlanNode
leafPlan
,
HashSet
<
Expression
>
conjunctsUsed
)
{
leavesUsed
=
new
HashSet
<>();
leavesUsed
.
add
(
leafPlan
);
joinPlan
=
leafPlan
;
this
.
conjunctsUsed
=
conjunctsUsed
;
}
/**
* Constructs a new instance for a <em>non-leaf node</em>. It should
* not be used for leaf plans!
*
* @param joinPlan the query plan that joins together all leaves
* specified in the <tt>leavesUsed</tt> argument.
*
* @param leavesUsed the set of two or more leaf plans that are joined
* together by the join plan.
*
* @param conjunctsUsed the set of conjuncts used by the join plan.
* Obviously, it is expected that all conjuncts specified here
* can actually be evaluated against the join plan.
*/
public
JoinComponent
(
PlanNode
joinPlan
,
HashSet
<
PlanNode
>
leavesUsed
,
HashSet
<
Expression
>
conjunctsUsed
)
{
this
.
joinPlan
=
joinPlan
;
this
.
leavesUsed
=
leavesUsed
;
this
.
conjunctsUsed
=
conjunctsUsed
;
}
}
/**
* Returns the root of a plan tree suitable for executing the specified
* query.
*
* @param selClause an object describing the query to be performed
*
* @return a plan tree for executing the specified query
*/
public
PlanNode
makePlan
(
SelectClause
selClause
,
List
<
SelectClause
>
enclosingSelects
)
{
// TODO: Implement!
//
// This is a very rough sketch of how this function will work,
// focusing mainly on join planning:
//
// 1) Pull out the top-level conjuncts from the FROM and WHERE
// clauses on the query, since we will handle them in special ways
// if we have outer joins.
//
// 2) Create an optimal join plan from the top-level from-clause and
// the top-level conjuncts.
//
// 3) If there are any unused conjuncts, determine how to handle them.
//
// 4) Create a project plan-node if necessary.
//
// 5) Handle other clauses such as ORDER BY, LIMIT/OFFSET, etc.
//
// Supporting other query features, such as grouping/aggregation,
// various kinds of subqueries, queries without a FROM clause, etc.,
// can all be incorporated into this sketch relatively easily.
return
null
;
}
/**
* Given the top-level {@code FromClause} for a SELECT-FROM-WHERE block,
* this helper generates an optimal join plan for the {@code FromClause}.
*
* @param fromClause the top-level {@code FromClause} of a
* SELECT-FROM-WHERE block.
* @param extraConjuncts any extra conjuncts (e.g. from the WHERE clause,
* or HAVING clause)
* @return a {@code JoinComponent} object that represents the optimal plan
* corresponding to the FROM-clause
*/
private
JoinComponent
makeJoinPlan
(
FromClause
fromClause
,
Collection
<
Expression
>
extraConjuncts
)
{
// These variables receive the leaf-clauses and join conjuncts found
// from scanning the sub-clauses. Initially, we put the extra conjuncts
// into the collection of conjuncts.
HashSet
<
Expression
>
conjuncts
=
new
HashSet
<>();
ArrayList
<
FromClause
>
leafFromClauses
=
new
ArrayList
<>();
collectDetails
(
fromClause
,
conjuncts
,
leafFromClauses
);
logger
.
debug
(
"Making join-plan for "
+
fromClause
);
logger
.
debug
(
" Collected conjuncts: "
+
conjuncts
);
logger
.
debug
(
" Collected FROM-clauses: "
+
leafFromClauses
);
logger
.
debug
(
" Extra conjuncts: "
+
extraConjuncts
);
if
(
extraConjuncts
!=
null
)
conjuncts
.
addAll
(
extraConjuncts
);
// Make a read-only set of the input conjuncts, to avoid bugs due to
// unintended side-effects.
Set
<
Expression
>
roConjuncts
=
Collections
.
unmodifiableSet
(
conjuncts
);
// Create a subplan for every single leaf FROM-clause, and prepare the
// leaf-plan.
logger
.
debug
(
"Generating plans for all leaves"
);
ArrayList
<
JoinComponent
>
leafComponents
=
generateLeafJoinComponents
(
leafFromClauses
,
roConjuncts
);
// Print out the results, for debugging purposes.
if
(
logger
.
isDebugEnabled
())
{
for
(
JoinComponent
leaf
:
leafComponents
)
{
logger
.
debug
(
" Leaf plan:\n"
+
PlanNode
.
printNodeTreeToString
(
leaf
.
joinPlan
,
true
));
}
}
// Build up the full query-plan using a dynamic programming approach.
JoinComponent
optimalJoin
=
generateOptimalJoin
(
leafComponents
,
roConjuncts
);
PlanNode
plan
=
optimalJoin
.
joinPlan
;
logger
.
info
(
"Optimal join plan generated:\n"
+
PlanNode
.
printNodeTreeToString
(
plan
,
true
));
return
optimalJoin
;
}
/**
* This helper method pulls the essential details for join optimization
* out of a <tt>FROM</tt> clause.
*
* TODO: FILL IN DETAILS.
*
* @param fromClause the from-clause to collect details from
*
* @param conjuncts the collection to add all conjuncts to
*
* @param leafFromClauses the collection to add all leaf from-clauses to
*/
private
void
collectDetails
(
FromClause
fromClause
,
HashSet
<
Expression
>
conjuncts
,
ArrayList
<
FromClause
>
leafFromClauses
)
{
// TODO: IMPLEMENT
}
/**
* This helper method performs the first step of the dynamic programming
* process to generate an optimal join plan, by generating a plan for every
* leaf from-clause identified from analyzing the query. Leaf plans are
* usually very simple; they are built either from base-tables or
* <tt>SELECT</tt> subqueries. The most complex detail is that any
* conjuncts in the query that can be evaluated solely against a particular
* leaf plan-node will be associated with the plan node. <em>This is a
* heuristic</em> that usually produces good plans (and certainly will for
* the current state of the database), but could easily interfere with
* indexes or other plan optimizations.
*
* @param leafFromClauses the collection of from-clauses found in the query
*
* @param conjuncts the collection of conjuncts that can be applied at this
* level
*
* @return a collection of {@link JoinComponent} object containing the plans
* and other details for each leaf from-clause
*/
private
ArrayList
<
JoinComponent
>
generateLeafJoinComponents
(
Collection
<
FromClause
>
leafFromClauses
,
Collection
<
Expression
>
conjuncts
)
{
// Create a subplan for every single leaf FROM-clause, and prepare the
// leaf-plan.
ArrayList
<
JoinComponent
>
leafComponents
=
new
ArrayList
<>();
for
(
FromClause
leafClause
:
leafFromClauses
)
{
HashSet
<
Expression
>
leafConjuncts
=
new
HashSet
<>();
PlanNode
leafPlan
=
makeLeafPlan
(
leafClause
,
conjuncts
,
leafConjuncts
);
JoinComponent
leaf
=
new
JoinComponent
(
leafPlan
,
leafConjuncts
);
leafComponents
.
add
(
leaf
);
}
return
leafComponents
;
}
/**
* Constructs a plan tree for evaluating the specified from-clause.
* TODO: COMPLETE THE DOCUMENTATION
*
* @param fromClause the select nodes that need to be joined.
*
* @param conjuncts additional conjuncts that can be applied when
* constructing the from-clause plan.
*
* @param leafConjuncts this is an output-parameter. Any conjuncts
* applied in this plan from the <tt>conjuncts</tt> collection
* should be added to this out-param.
*
* @return a plan tree for evaluating the specified from-clause
*
* @throws IllegalArgumentException if the specified from-clause is a join
* expression that isn't an outer join, or has some other
* unrecognized type.
*/
private
PlanNode
makeLeafPlan
(
FromClause
fromClause
,
Collection
<
Expression
>
conjuncts
,
HashSet
<
Expression
>
leafConjuncts
)
{
// TODO: IMPLEMENT.
// If you apply any conjuncts then make sure to add them to the
// leafConjuncts collection.
//
// Don't forget that all from-clauses can specify an alias.
//
// Concentrate on properly handling cases other than outer
// joins first, then focus on outer joins once you have the
// typical cases supported.
return
null
;
}
/**
* This helper method builds up a full join-plan using a dynamic programming
* approach. The implementation maintains a collection of optimal
* intermediate plans that join <em>n</em> of the leaf nodes, each with its
* own associated cost, and then uses that collection to generate a new
* collection of optimal intermediate plans that join <em>n+1</em> of the
* leaf nodes. This process completes when all leaf plans are joined
* together; there will be <em>one</em> plan, and it will be the optimal
* join plan (as far as our limited estimates can determine, anyway).
*
* @param leafComponents the collection of leaf join-components, generated
* by the {@link #generateLeafJoinComponents} method.
*
* @param conjuncts the collection of all conjuncts found in the query
*
* @return a single {@link JoinComponent} object that joins all leaf
* components together in an optimal way.
*/
private
JoinComponent
generateOptimalJoin
(
ArrayList
<
JoinComponent
>
leafComponents
,
Set
<
Expression
>
conjuncts
)
{
// This object maps a collection of leaf-plans (represented as a
// hash-set) to the optimal join-plan for that collection of leaf plans.
//
// This collection starts out only containing the leaf plans themselves,
// and on each iteration of the loop below, join-plans are grown by one
// leaf. For example:
// * In the first iteration, all plans joining 2 leaves are created.
// * In the second iteration, all plans joining 3 leaves are created.
// * etc.
// At the end, the collection will contain ONE entry, which is the
// optimal way to join all N leaves. Go Go Gadget Dynamic Programming!
HashMap
<
HashSet
<
PlanNode
>,
JoinComponent
>
joinPlans
=
new
HashMap
<>();
// Initially populate joinPlans with just the N leaf plans.
for
(
JoinComponent
leaf
:
leafComponents
)
joinPlans
.
put
(
leaf
.
leavesUsed
,
leaf
);
while
(
joinPlans
.
size
()
>
1
)
{
logger
.
debug
(
"Current set of join-plans has "
+
joinPlans
.
size
()
+
" plans in it."
);
// This is the set of "next plans" we will generate. Plans only
// get stored if they are the first plan that joins together the
// specified leaves, or if they are better than the current plan.
HashMap
<
HashSet
<
PlanNode
>,
JoinComponent
>
nextJoinPlans
=
new
HashMap
<>();
// TODO: IMPLEMENT THE CODE THAT GENERATES OPTIMAL PLANS THAT
// JOIN N + 1 LEAVES
// Now that we have generated all plans joining N leaves, time to
// create all plans joining N + 1 leaves.
joinPlans
=
nextJoinPlans
;
}
// At this point, the set of join plans should only contain one plan,
// and it should be the optimal plan.
assert
joinPlans
.
size
()
==
1
:
"There can be only one optimal join plan!"
;
return
joinPlans
.
values
().
iterator
().
next
();
}
/**
* Constructs a simple select plan that reads directly from a table, with
* an optional predicate for selecting rows.
* <p>
* While this method can be used for building up larger <tt>SELECT</tt>
* queries, the returned plan is also suitable for use in <tt>UPDATE</tt>
* and <tt>DELETE</tt> command evaluation. In these cases, the plan must
* only generate tuples of type {@link edu.caltech.nanodb.storage.PageTuple},
* so that the command can modify or delete the actual tuple in the file's
* page data.
*
* @param tableName The name of the table that is being selected from.
*
* @param predicate An optional selection predicate, or {@code null} if
* no filtering is desired.
*
* @return A new plan-node for evaluating the select operation.
*/
public
SelectNode
makeSimpleSelect
(
String
tableName
,
Expression
predicate
,
List
<
SelectClause
>
enclosingSelects
)
{
if
(
tableName
==
null
)
throw
new
IllegalArgumentException
(
"tableName cannot be null"
);
if
(
enclosingSelects
!=
null
)
{
// If there are enclosing selects, this subquery's predicate may
// reference an outer query's value, but we don't detect that here.
// Therefore we will probably fail with an unrecognized column
// reference.
logger
.
warn
(
"Currently we are not clever enough to detect "
+
"correlated subqueries, so expect things are about to break..."
);
}
// Open the table.
TableInfo
tableInfo
=
storageManager
.
getTableManager
().
openTable
(
tableName
);
// Make a SelectNode to read rows from the table, with the specified
// predicate.
SelectNode
selectNode
=
new
FileScanNode
(
tableInfo
,
predicate
);
selectNode
.
prepare
();
return
selectNode
;
}
}
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment
Menu
Projects
Groups
Snippets
Help