Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
cs122-19wi
nanodb-base
Commits
4eefbbc0
Commit
4eefbbc0
authored
6 years ago
by
Donald H. (Donnie) Pinkston, III
Browse files
Options
Download
Email Patches
Plain Diff
Initial version of CostBasedJoinPlanner for HW4
parent
c7b4e8f9
master
No related merge requests found
Pipeline
#7105
failed with stages
Changes
1
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
src/main/java/edu/caltech/nanodb/queryeval/CostBasedJoinPlanner.java
+426
-0
...va/edu/caltech/nanodb/queryeval/CostBasedJoinPlanner.java
with
426 additions
and
0 deletions
+426
-0
src/main/java/edu/caltech/nanodb/queryeval/CostBasedJoinPlanner.java
0 → 100644
View file @
4eefbbc0
package
edu.caltech.nanodb.queryeval
;
import
java.util.ArrayList
;
import
java.util.Collection
;
import
java.util.Collections
;
import
java.util.HashMap
;
import
java.util.HashSet
;
import
java.util.List
;
import
java.util.Set
;
import
org.apache.logging.log4j.LogManager
;
import
org.apache.logging.log4j.Logger
;
import
edu.caltech.nanodb.expressions.Expression
;
import
edu.caltech.nanodb.plannodes.FileScanNode
;
import
edu.caltech.nanodb.plannodes.PlanNode
;
import
edu.caltech.nanodb.plannodes.SelectNode
;
import
edu.caltech.nanodb.queryast.FromClause
;
import
edu.caltech.nanodb.queryast.SelectClause
;
import
edu.caltech.nanodb.relations.TableInfo
;
import
edu.caltech.nanodb.storage.StorageManager
;
/**
* This planner implementation uses dynamic programming to devise an optimal
* join strategy for the query. As always, queries are optimized in units of
* <tt>SELECT</tt>-<tt>FROM</tt>-<tt>WHERE</tt> subqueries; optimizations
* don't currently span multiple subqueries.
*/
public
class
CostBasedJoinPlanner
implements
Planner
{
/** A logging object for reporting anything interesting that happens. */
private
static
Logger
logger
=
LogManager
.
getLogger
(
CostBasedJoinPlanner
.
class
);
/** The storage manager used during query planning. */
protected
StorageManager
storageManager
;
/** Sets the server to be used during query planning. */
public
void
setStorageManager
(
StorageManager
storageManager
)
{
if
(
storageManager
==
null
)
throw
new
IllegalArgumentException
(
"storageManager cannot be null"
);
this
.
storageManager
=
storageManager
;
}
/**
* This helper class is used to keep track of one "join component" in the
* dynamic programming algorithm. A join component is simply a query plan
* for joining one or more leaves of the query.
* <p>
* In this context, a "leaf" may either be a base table or a subquery in
* the <tt>FROM</tt>-clause of the query. However, the planner will
* attempt to push conjuncts down the plan as far as possible, so even if
* a leaf is a base table, the plan may be a bit more complex than just a
* single file-scan.
*/
private
static
class
JoinComponent
{
/**
* This is the join plan itself, that joins together all leaves
* specified in the {@link #leavesUsed} field.
*/
public
PlanNode
joinPlan
;
/**
* This field specifies the collection of leaf-plans that are joined by
* the plan in this join-component.
*/
public
HashSet
<
PlanNode
>
leavesUsed
;
/**
* This field specifies the collection of all conjuncts use by this join
* plan. It allows us to easily determine what join conjuncts still
* remain to be incorporated into the query.
*/
public
HashSet
<
Expression
>
conjunctsUsed
;
/**
* Constructs a new instance for a <em>leaf node</em>. It should not
* be used for join-plans that join together two or more leaves. This
* constructor simply adds the leaf-plan into the {@link #leavesUsed}
* collection.
*
* @param leafPlan the query plan for this leaf of the query.
*
* @param conjunctsUsed the set of conjuncts used by the leaf plan.
* This may be an empty set if no conjuncts apply solely to
* this leaf, or it may be nonempty if some conjuncts apply
* solely to this leaf.
*/
public
JoinComponent
(
PlanNode
leafPlan
,
HashSet
<
Expression
>
conjunctsUsed
)
{
leavesUsed
=
new
HashSet
<>();
leavesUsed
.
add
(
leafPlan
);
joinPlan
=
leafPlan
;
this
.
conjunctsUsed
=
conjunctsUsed
;
}
/**
* Constructs a new instance for a <em>non-leaf node</em>. It should
* not be used for leaf plans!
*
* @param joinPlan the query plan that joins together all leaves
* specified in the <tt>leavesUsed</tt> argument.
*
* @param leavesUsed the set of two or more leaf plans that are joined
* together by the join plan.
*
* @param conjunctsUsed the set of conjuncts used by the join plan.
* Obviously, it is expected that all conjuncts specified here
* can actually be evaluated against the join plan.
*/
public
JoinComponent
(
PlanNode
joinPlan
,
HashSet
<
PlanNode
>
leavesUsed
,
HashSet
<
Expression
>
conjunctsUsed
)
{
this
.
joinPlan
=
joinPlan
;
this
.
leavesUsed
=
leavesUsed
;
this
.
conjunctsUsed
=
conjunctsUsed
;
}
}
/**
* Returns the root of a plan tree suitable for executing the specified
* query.
*
* @param selClause an object describing the query to be performed
*
* @return a plan tree for executing the specified query
*/
public
PlanNode
makePlan
(
SelectClause
selClause
,
List
<
SelectClause
>
enclosingSelects
)
{
// TODO: Implement!
//
// This is a very rough sketch of how this function will work,
// focusing mainly on join planning:
//
// 1) Pull out the top-level conjuncts from the FROM and WHERE
// clauses on the query, since we will handle them in special ways
// if we have outer joins.
//
// 2) Create an optimal join plan from the top-level from-clause and
// the top-level conjuncts.
//
// 3) If there are any unused conjuncts, determine how to handle them.
//
// 4) Create a project plan-node if necessary.
//
// 5) Handle other clauses such as ORDER BY, LIMIT/OFFSET, etc.
//
// Supporting other query features, such as grouping/aggregation,
// various kinds of subqueries, queries without a FROM clause, etc.,
// can all be incorporated into this sketch relatively easily.
return
null
;
}
/**
* Given the top-level {@code FromClause} for a SELECT-FROM-WHERE block,
* this helper generates an optimal join plan for the {@code FromClause}.
*
* @param fromClause the top-level {@code FromClause} of a
* SELECT-FROM-WHERE block.
* @param extraConjuncts any extra conjuncts (e.g. from the WHERE clause,
* or HAVING clause)
* @return a {@code JoinComponent} object that represents the optimal plan
* corresponding to the FROM-clause
*/
private
JoinComponent
makeJoinPlan
(
FromClause
fromClause
,
Collection
<
Expression
>
extraConjuncts
)
{
// These variables receive the leaf-clauses and join conjuncts found
// from scanning the sub-clauses. Initially, we put the extra conjuncts
// into the collection of conjuncts.
HashSet
<
Expression
>
conjuncts
=
new
HashSet
<>();
ArrayList
<
FromClause
>
leafFromClauses
=
new
ArrayList
<>();
collectDetails
(
fromClause
,
conjuncts
,
leafFromClauses
);
logger
.
debug
(
"Making join-plan for "
+
fromClause
);
logger
.
debug
(
" Collected conjuncts: "
+
conjuncts
);
logger
.
debug
(
" Collected FROM-clauses: "
+
leafFromClauses
);
logger
.
debug
(
" Extra conjuncts: "
+
extraConjuncts
);
if
(
extraConjuncts
!=
null
)
conjuncts
.
addAll
(
extraConjuncts
);
// Make a read-only set of the input conjuncts, to avoid bugs due to
// unintended side-effects.
Set
<
Expression
>
roConjuncts
=
Collections
.
unmodifiableSet
(
conjuncts
);
// Create a subplan for every single leaf FROM-clause, and prepare the
// leaf-plan.
logger
.
debug
(
"Generating plans for all leaves"
);
ArrayList
<
JoinComponent
>
leafComponents
=
generateLeafJoinComponents
(
leafFromClauses
,
roConjuncts
);
// Print out the results, for debugging purposes.
if
(
logger
.
isDebugEnabled
())
{
for
(
JoinComponent
leaf
:
leafComponents
)
{
logger
.
debug
(
" Leaf plan:\n"
+
PlanNode
.
printNodeTreeToString
(
leaf
.
joinPlan
,
true
));
}
}
// Build up the full query-plan using a dynamic programming approach.
JoinComponent
optimalJoin
=
generateOptimalJoin
(
leafComponents
,
roConjuncts
);
PlanNode
plan
=
optimalJoin
.
joinPlan
;
logger
.
info
(
"Optimal join plan generated:\n"
+
PlanNode
.
printNodeTreeToString
(
plan
,
true
));
return
optimalJoin
;
}
/**
* This helper method pulls the essential details for join optimization
* out of a <tt>FROM</tt> clause.
*
* TODO: FILL IN DETAILS.
*
* @param fromClause the from-clause to collect details from
*
* @param conjuncts the collection to add all conjuncts to
*
* @param leafFromClauses the collection to add all leaf from-clauses to
*/
private
void
collectDetails
(
FromClause
fromClause
,
HashSet
<
Expression
>
conjuncts
,
ArrayList
<
FromClause
>
leafFromClauses
)
{
// TODO: IMPLEMENT
}
/**
* This helper method performs the first step of the dynamic programming
* process to generate an optimal join plan, by generating a plan for every
* leaf from-clause identified from analyzing the query. Leaf plans are
* usually very simple; they are built either from base-tables or
* <tt>SELECT</tt> subqueries. The most complex detail is that any
* conjuncts in the query that can be evaluated solely against a particular
* leaf plan-node will be associated with the plan node. <em>This is a
* heuristic</em> that usually produces good plans (and certainly will for
* the current state of the database), but could easily interfere with
* indexes or other plan optimizations.
*
* @param leafFromClauses the collection of from-clauses found in the query
*
* @param conjuncts the collection of conjuncts that can be applied at this
* level
*
* @return a collection of {@link JoinComponent} object containing the plans
* and other details for each leaf from-clause
*/
private
ArrayList
<
JoinComponent
>
generateLeafJoinComponents
(
Collection
<
FromClause
>
leafFromClauses
,
Collection
<
Expression
>
conjuncts
)
{
// Create a subplan for every single leaf FROM-clause, and prepare the
// leaf-plan.
ArrayList
<
JoinComponent
>
leafComponents
=
new
ArrayList
<>();
for
(
FromClause
leafClause
:
leafFromClauses
)
{
HashSet
<
Expression
>
leafConjuncts
=
new
HashSet
<>();
PlanNode
leafPlan
=
makeLeafPlan
(
leafClause
,
conjuncts
,
leafConjuncts
);
JoinComponent
leaf
=
new
JoinComponent
(
leafPlan
,
leafConjuncts
);
leafComponents
.
add
(
leaf
);
}
return
leafComponents
;
}
/**
* Constructs a plan tree for evaluating the specified from-clause.
* TODO: COMPLETE THE DOCUMENTATION
*
* @param fromClause the select nodes that need to be joined.
*
* @param conjuncts additional conjuncts that can be applied when
* constructing the from-clause plan.
*
* @param leafConjuncts this is an output-parameter. Any conjuncts
* applied in this plan from the <tt>conjuncts</tt> collection
* should be added to this out-param.
*
* @return a plan tree for evaluating the specified from-clause
*
* @throws IllegalArgumentException if the specified from-clause is a join
* expression that isn't an outer join, or has some other
* unrecognized type.
*/
private
PlanNode
makeLeafPlan
(
FromClause
fromClause
,
Collection
<
Expression
>
conjuncts
,
HashSet
<
Expression
>
leafConjuncts
)
{
// TODO: IMPLEMENT.
// If you apply any conjuncts then make sure to add them to the
// leafConjuncts collection.
//
// Don't forget that all from-clauses can specify an alias.
//
// Concentrate on properly handling cases other than outer
// joins first, then focus on outer joins once you have the
// typical cases supported.
return
null
;
}
/**
* This helper method builds up a full join-plan using a dynamic programming
* approach. The implementation maintains a collection of optimal
* intermediate plans that join <em>n</em> of the leaf nodes, each with its
* own associated cost, and then uses that collection to generate a new
* collection of optimal intermediate plans that join <em>n+1</em> of the
* leaf nodes. This process completes when all leaf plans are joined
* together; there will be <em>one</em> plan, and it will be the optimal
* join plan (as far as our limited estimates can determine, anyway).
*
* @param leafComponents the collection of leaf join-components, generated
* by the {@link #generateLeafJoinComponents} method.
*
* @param conjuncts the collection of all conjuncts found in the query
*
* @return a single {@link JoinComponent} object that joins all leaf
* components together in an optimal way.
*/
private
JoinComponent
generateOptimalJoin
(
ArrayList
<
JoinComponent
>
leafComponents
,
Set
<
Expression
>
conjuncts
)
{
// This object maps a collection of leaf-plans (represented as a
// hash-set) to the optimal join-plan for that collection of leaf plans.
//
// This collection starts out only containing the leaf plans themselves,
// and on each iteration of the loop below, join-plans are grown by one
// leaf. For example:
// * In the first iteration, all plans joining 2 leaves are created.
// * In the second iteration, all plans joining 3 leaves are created.
// * etc.
// At the end, the collection will contain ONE entry, which is the
// optimal way to join all N leaves. Go Go Gadget Dynamic Programming!
HashMap
<
HashSet
<
PlanNode
>,
JoinComponent
>
joinPlans
=
new
HashMap
<>();
// Initially populate joinPlans with just the N leaf plans.
for
(
JoinComponent
leaf
:
leafComponents
)
joinPlans
.
put
(
leaf
.
leavesUsed
,
leaf
);
while
(
joinPlans
.
size
()
>
1
)
{
logger
.
debug
(
"Current set of join-plans has "
+
joinPlans
.
size
()
+
" plans in it."
);
// This is the set of "next plans" we will generate. Plans only
// get stored if they are the first plan that joins together the
// specified leaves, or if they are better than the current plan.
HashMap
<
HashSet
<
PlanNode
>,
JoinComponent
>
nextJoinPlans
=
new
HashMap
<>();
// TODO: IMPLEMENT THE CODE THAT GENERATES OPTIMAL PLANS THAT
// JOIN N + 1 LEAVES
// Now that we have generated all plans joining N leaves, time to
// create all plans joining N + 1 leaves.
joinPlans
=
nextJoinPlans
;
}
// At this point, the set of join plans should only contain one plan,
// and it should be the optimal plan.
assert
joinPlans
.
size
()
==
1
:
"There can be only one optimal join plan!"
;
return
joinPlans
.
values
().
iterator
().
next
();
}
/**
* Constructs a simple select plan that reads directly from a table, with
* an optional predicate for selecting rows.
* <p>
* While this method can be used for building up larger <tt>SELECT</tt>
* queries, the returned plan is also suitable for use in <tt>UPDATE</tt>
* and <tt>DELETE</tt> command evaluation. In these cases, the plan must
* only generate tuples of type {@link edu.caltech.nanodb.storage.PageTuple},
* so that the command can modify or delete the actual tuple in the file's
* page data.
*
* @param tableName The name of the table that is being selected from.
*
* @param predicate An optional selection predicate, or {@code null} if
* no filtering is desired.
*
* @return A new plan-node for evaluating the select operation.
*/
public
SelectNode
makeSimpleSelect
(
String
tableName
,
Expression
predicate
,
List
<
SelectClause
>
enclosingSelects
)
{
if
(
tableName
==
null
)
throw
new
IllegalArgumentException
(
"tableName cannot be null"
);
if
(
enclosingSelects
!=
null
)
{
// If there are enclosing selects, this subquery's predicate may
// reference an outer query's value, but we don't detect that here.
// Therefore we will probably fail with an unrecognized column
// reference.
logger
.
warn
(
"Currently we are not clever enough to detect "
+
"correlated subqueries, so expect things are about to break..."
);
}
// Open the table.
TableInfo
tableInfo
=
storageManager
.
getTableManager
().
openTable
(
tableName
);
// Make a SelectNode to read rows from the table, with the specified
// predicate.
SelectNode
selectNode
=
new
FileScanNode
(
tableInfo
,
predicate
);
selectNode
.
prepare
();
return
selectNode
;
}
}
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment
Menu
Projects
Groups
Snippets
Help