Commit 87630d9d authored by Donald H. (Donnie) Pinkston, III's avatar Donald H. (Donnie) Pinkston, III
Browse files

Support classes for index scans in queries

The IndexScanNode supports performing index-scans against a table based
on a suitable predicate.

To facilitate use of this plan node, the AnalyzedPredicate and
IndexScanEndpoints classes support analyzing a predicate to determine
whether it is an equality lookup and/or a range scan, and which indexes
on a table may be useful for the scan.

Finally, the IndexInfo class has been updated to report whether an index
is a sequential index or a hash index.
parent 7afe6bee
No related merge requests found
Showing with 842 additions and 0 deletions
+842 -0
package edu.caltech.nanodb.indexes;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import edu.caltech.nanodb.expressions.ColumnName;
import edu.caltech.nanodb.expressions.ColumnValue;
import edu.caltech.nanodb.expressions.CompareOperator;
import edu.caltech.nanodb.expressions.Expression;
import edu.caltech.nanodb.expressions.LiteralValue;
import edu.caltech.nanodb.expressions.PredicateUtils;
import edu.caltech.nanodb.relations.ColumnInfo;
import edu.caltech.nanodb.relations.IndexColumnRefs;
import edu.caltech.nanodb.relations.Schema;
/**
* <p>
* This class takes a predicate or a collection of conjuncts and analyzes it
* into a "range" component and an "other" component. The "range" component
* consists of "column op value" comparisons (where op is =, !=, >, >=, < or
* <=); these conjuncts specify ranges on specific columns, and can be used
* to choose indexes for query planning. The "other" component is all other
* conjuncts, including "column op column" conjuncts, other functions and
* operations (such as string-matching operations, <tt>IS NULL</tt> tests,
* etc.), nested disjunctions, and so forth.
* </p>
* <p>
* This class expects that expressions have been both normalized and
* simplified. It is designed to work with conjunctive selection predicates
* (i.e. a series of conditions ANDed together), and will not be particularly
* effective with other kinds of predicates.
* </p>
*/
public class AnalyzedPredicate {
/**
* Records the starting and ending points of a range component of a query
* predicate. Only conjuncts of the form "column op value" are
* represented, and it is possible that either the start or the end is
* missing if there is no corresponding conjunct.
*/
public static class RangeEndpoints {
/**
* A starting endpoint on an attribute. The value will be a
* comparison with a =, >= or > operator.
*/
public CompareOperator startExpr;
/**
* An ending endpoint on an attribute. The value will be a comparison
* with a =, <= or < operator.
*/
public CompareOperator endExpr;
private void addCompare(CompareOperator cmp) {
if (cmp == null)
throw new IllegalArgumentException("cmp cannot be null");
if (!(cmp.getLeftExpression() instanceof ColumnValue)) {
throw new IllegalArgumentException(
"cmp LHS must be a ColumnValue");
}
if (!(cmp.getRightExpression() instanceof LiteralValue)) {
throw new IllegalArgumentException(
"cmp RHS must be a LiteralValue");
}
CompareOperator.Type type = cmp.getType();
// TODO(donnie): What if the startExpr or endExpr is already set?
// Need to see if the condition is impossible to
// satisfy.
switch (type) {
case EQUALS:
startExpr = cmp;
endExpr = cmp;
break;
case GREATER_OR_EQUAL:
case GREATER_THAN:
startExpr = cmp;
break;
case LESS_OR_EQUAL:
case LESS_THAN:
endExpr = cmp;
break;
default:
break; // Do nothing.
}
}
CompareOperator getStartCondition() {
return startExpr;
}
CompareOperator getEndCondition() {
return endExpr;
}
Object getStartValue() {
Object result = null;
if (startExpr != null)
result = startExpr.getRightExpression().evaluate();
return result;
}
Object getEndValue() {
Object result = null;
if (endExpr != null)
result = endExpr.getRightExpression().evaluate();
return result;
}
}
/**
* These conjuncts are specifically of the form "column op value", and
* therefore may be useful .
*/
HashMap<ColumnName, RangeEndpoints> rangeConjuncts = new HashMap<>();
/**
* Other conjuncts in the predicate that are not of the form
* "column op value".
*/
ArrayList<Expression> otherConjuncts = new ArrayList<>();
public AnalyzedPredicate(Expression predicate) {
addPredicate(predicate);
}
public AnalyzedPredicate(Collection<Expression> conjuncts) {
addConjuncts(conjuncts);
}
private void addPredicate(Expression predicate) {
HashSet<Expression> conjuncts = new HashSet<>();
PredicateUtils.collectConjuncts(predicate, conjuncts);
addConjuncts(conjuncts);
}
private void addConjuncts(Collection<Expression> conjuncts) {
for (Expression e : conjuncts)
addConjunct(e);
}
private void addConjunct(Expression conjunct) {
if (conjunct instanceof CompareOperator) {
CompareOperator cmp = (CompareOperator) conjunct;
if (cmp.getLeftExpression() instanceof ColumnValue &&
cmp.getRightExpression() instanceof LiteralValue) {
ColumnValue lhs = (ColumnValue) cmp.getLeftExpression();
RangeEndpoints range = rangeConjuncts.computeIfAbsent(
lhs.getColumnName(), k -> new RangeEndpoints());
range.addCompare(cmp);
return;
}
}
otherConjuncts.add(conjunct);
}
public IndexScanEndpoints canUseIndex(Schema schema, IndexType indexType,
IndexColumnRefs colRefs) {
ArrayList<RangeEndpoints> ranges = new ArrayList<>();
for (int iCol : colRefs.getCols()) {
ColumnInfo colInfo = schema.getColumnInfo(iCol);
ColumnName colName = colInfo.getColumnName();
// Are there range endpoints for this column?
RangeEndpoints colEndpoints = rangeConjuncts.get(colName);
if (colEndpoints == null) {
// No range-endpoints for this column.
// Hash indexes require all columns to be present.
if (indexType == IndexType.HASHED_INDEX)
return null;
// If we are here then the index is a sequential index, and
// we can function with some prefix of the index. Make sure
// we have found at least one range so far.
assert indexType == IndexType.ORDERED_INDEX;
if (ranges.isEmpty()) {
// We don't have any ranges! Can't use this index.
return null;
}
// If we are here then the index is a sequential index and we
// have some prefix of the index.
break;
}
else {
// We have range endpoints for this column.
// Hash indexes require an equality test on all columns. Note
// that if the start-expression is EQUALS then the
// end-expression will also be EQUALS, since an EQUALS test
// reduces the range to a single value.
if (indexType == IndexType.HASHED_INDEX) {
if (colEndpoints.startExpr == null ||
colEndpoints.startExpr.getType() !=
CompareOperator.Type.EQUALS) {
// This comparison is either absent or not EQUALS.
// Can't use the index.
return null;
}
}
// If we got here, we can use this range with the index.
ranges.add(colEndpoints);
}
}
// If we got here, we have a series of one or more range tests that
// we can use against the index! Pack it up and send it back to the
// caller.
assert !ranges.isEmpty();
return new IndexScanEndpoints(ranges);
}
}
......@@ -4,6 +4,8 @@ package edu.caltech.nanodb.indexes;
import edu.caltech.nanodb.relations.IndexColumnRefs;
import edu.caltech.nanodb.relations.Schema;
import edu.caltech.nanodb.relations.TableInfo;
import edu.caltech.nanodb.storage.HashedTupleFile;
import edu.caltech.nanodb.storage.SequentialTupleFile;
import edu.caltech.nanodb.storage.TupleFile;
......@@ -82,6 +84,18 @@ public class IndexInfo {
}
public IndexType getIndexType() {
IndexType type = null;
if (tupleFile instanceof SequentialTupleFile)
type = IndexType.ORDERED_INDEX;
else if (tupleFile instanceof HashedTupleFile)
type = IndexType.HASHED_INDEX;
return type;
}
public TableInfo getTableInfo() {
return tableInfo;
}
......
package edu.caltech.nanodb.indexes;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import edu.caltech.nanodb.expressions.Expression;
import edu.caltech.nanodb.expressions.LiteralValue;
import edu.caltech.nanodb.expressions.PredicateUtils;
/**
* <p>
* This class represents endpoint information relative to a specific
* index. This includes a predicate for identifying the starting tuple
* from the index's contents, and a predicate for identifying where tuples
* from the index stop satisfying the predicate.
* </p>
* <p>
* There are also two arrays of values for constructing search-key tuples
* for the initial lookup against the index. For a hash index the
* search-key will include a value for every column in the index; for an
* ordered index the search-key value may be any prefix of the index's
* search-key.
* </p>
*/
public class IndexScanEndpoints {
/**
* This hash-set holds all conjuncts used to generate the starting and
* ending predicates.
*/
private HashSet<Expression> conjunctsUsed = new HashSet<>();
/**
* A predicate that can be used to identify the first tuple in an
* index's tuple-sequence that satisfies a query's predicate.
*/
private Expression startPredicate;
/**
* A predicate that can be used to identify the remaining tuples in an
* index's tuple-sequence that satisfies a query's predicate.
*/
private Expression endPredicate;
/**
* An array of values that can be used for performing an index lookup
* to find the starting point in the tuple sequence.
*/
private ArrayList<Object> startValues;
/**
* An array of values that can be used for performing an index lookup
* to find the ending point in the tuple sequence.
*/
private ArrayList<Object> endValues;
/**
* Initialze a new index-endpoints object that can be used to find the
* starting and ending points in an index for retrieving tuples that
* satisfy a specific predicate.
*
* @param ranges a list of {@code RangeEndpoints} objects describing
* specific columns that are referenced by a predicate
*/
public IndexScanEndpoints(List<AnalyzedPredicate.RangeEndpoints> ranges) {
if (ranges == null)
throw new IllegalArgumentException("ranges cannot be null");
if (ranges.isEmpty()) {
throw new IllegalArgumentException(
"ranges must contain at least one range");
}
ArrayList<Expression> startExprs = new ArrayList<>();
ArrayList<Expression> endExprs = new ArrayList<>();
startValues = new ArrayList<>();
endValues = new ArrayList<>();
int i = 0;
boolean startHitNull = false;
boolean endHitNull = false;
for (AnalyzedPredicate.RangeEndpoints range : ranges) {
if (range.startExpr != null) {
startExprs.add(range.startExpr);
if (!startHitNull) {
LiteralValue litVal =
(LiteralValue) range.startExpr.getRightExpression();
Object value = litVal.evaluate();
if (value != null)
startValues.add(value);
else
startHitNull = true;
}
}
if (range.endExpr != null) {
endExprs.add(range.endExpr);
if (!endHitNull) {
LiteralValue litVal =
(LiteralValue) range.endExpr.getRightExpression();
Object value = litVal.evaluate();
if (value != null)
endValues.add(value);
else
endHitNull = true;
}
}
i++;
}
// If either the start or the end predicate has no conjuncts, the
// corresponding predicate will be set to null.
startPredicate = PredicateUtils.makePredicate(startExprs);
endPredicate = PredicateUtils.makePredicate(endExprs);
// Record what conjuncts were used, if any.
conjunctsUsed.addAll(startExprs);
conjunctsUsed.addAll(endExprs);
}
public Expression getStartPredicate() {
return startPredicate;
}
public Expression getEndPredicate() {
return endPredicate;
}
public Set<Expression> getConjunctsUsed() {
return Collections.unmodifiableSet(conjunctsUsed);
}
public List<Object> getStartValues() {
return Collections.unmodifiableList(startValues);
}
public List<Object> getEndValues() {
return Collections.unmodifiableList(endValues);
}
@Override
public String toString() {
return "IndexScanEndpoints[start: " + startPredicate + ", end: " +
endPredicate + ", startValues: " + startValues + "]";
}
}
//~ Include file if #full.
package edu.caltech.nanodb.plannodes;
import java.util.List;
import java.util.Objects;
import org.apache.logging.log4j.Logger;
import org.apache.logging.log4j.LogManager;
import edu.caltech.nanodb.expressions.Expression;
import edu.caltech.nanodb.expressions.OrderByExpression;
import edu.caltech.nanodb.expressions.TupleLiteral;
import edu.caltech.nanodb.indexes.IndexInfo;
import edu.caltech.nanodb.indexes.IndexManager;
import edu.caltech.nanodb.indexes.IndexScanEndpoints;
import edu.caltech.nanodb.queryeval.PlanCost;
import edu.caltech.nanodb.queryeval.TableStats;
import edu.caltech.nanodb.relations.Schema;
import edu.caltech.nanodb.relations.Tuple;
import edu.caltech.nanodb.storage.FilePointer;
import edu.caltech.nanodb.storage.HashedTupleFile;
import edu.caltech.nanodb.storage.SequentialTupleFile;
import edu.caltech.nanodb.storage.TupleFile;
/**
* A select plan-node that uses an index to access the tuples in a tuple file,
* checking the optional predicate against the values in the index.
*/
public class IndexScanNode extends PlanNode {
/** A logging object for reporting anything interesting that happens. */
private static Logger logger = LogManager.getLogger(IndexScanNode.class);
/** The index-info for the index being scanned. */
private IndexInfo indexInfo;
/** The index being used for the index scan. */
private TupleFile indexTupleFile;
/**
* The table that the index is built against. This is used to resolve
* the index-tuples into the table-file tuples.
*/
private TupleFile tableTupleFile;
/**
* This is the value to use when looking up the first tuple in the index.
* Note that this may not be the first tuple that this node returns. For
* example, if the predicate is "<tt>a > 5</tt>" then the initial lookup
* value will be "<tt>a == 5</tt>", but that lookup may not return a row
* that satisfies the predicate. This is the purpose of
* {@link #startPredicate}.
*/
private TupleLiteral startLookupValue;
/**
* This is the predicate that the first tuple in the index-scan must
* satisfy. Note that all tuples must also satisfy the
* {@link #endPredicate} predicate. Note also that this may be
* {@code null} if the index-scan must start with the first tuple in the
* index.
*/
private Expression startPredicate;
/**
* This is the predicate that all tuples in the index-scan must satisfy.
* It is named "endPredicate" because when it becomes false, the node has
* reached the end of the index-scan. Note that this may be {@code null}
* if the index-scan must start with the first tuple in the index.
*/
private Expression endPredicate;
/**
* The current tuple from the index that is being used. Note that this is
* not what {@link #getNextTuple} returns; rather, it's the index-record
* used to look up that tuple.
*/
private Tuple currentIndexTuple;
/**
* The index must have one column named "<tt>#TUPLE_PTR</tt>"; this is the
* column-index of that column in the index's schema.
*/
private int idxTuplePtr;
/** True if we have finished scanning or pulling tuples from children. */
private boolean done;
/**
* This field allows the index-scan node to mark a particular tuple in the
* tuple-stream and then rewind to that point in the tuple-stream.
*/
private FilePointer markedTuple;
private boolean jumpToMarkedTuple;
/**
* Construct an index scan node that performs an equality-based lookup on
* an index.
*
* @param indexInfo the information about the index being used
*/
public IndexScanNode(IndexInfo indexInfo,
IndexScanEndpoints indexEndpoints) {
super();
if (indexInfo == null)
throw new IllegalArgumentException("indexInfo cannot be null");
if (indexEndpoints == null)
throw new IllegalArgumentException("indexEndpoints cannot be null");
// Store the index endpoints, and a TupleLiteral of where to start
// looking in the index.
startPredicate = indexEndpoints.getStartPredicate();
endPredicate = indexEndpoints.getEndPredicate();
startLookupValue =
new TupleLiteral(indexEndpoints.getStartValues().toArray());
// Pull out the tuple-file for the index, as well as the tuple-file
// for the table that the index references.
this.indexInfo = indexInfo;
indexTupleFile = indexInfo.getTupleFile();
tableTupleFile = indexInfo.getTableInfo().getTupleFile();
// Figure out which column in the index is the tuple-pointer column.
Schema idxSchema = indexTupleFile.getSchema();
idxTuplePtr = idxSchema.getColumnIndex(IndexManager.COLNAME_TUPLEPTR);
if (idxTuplePtr == -1) {
throw new IllegalArgumentException("Index must have a column " +
"named " + IndexManager.COLNAME_TUPLEPTR);
}
}
/**
* Returns true if the passed-in object is a <tt>FileScanNode</tt> with
* the same predicate and table.
*
* @param obj the object to check for equality
*
* @return true if the passed-in object is equal to this object; false
* otherwise
*/
@Override
public boolean equals(Object obj) {
if (obj instanceof IndexScanNode) {
IndexScanNode other = (IndexScanNode) obj;
// We don't include the table-info or the index-info since each
// table or index is in its own tuple file.
return indexTupleFile.equals(other.indexTupleFile) &&
Objects.equals(startPredicate, other.startPredicate) &&
Objects.equals(endPredicate, other.endPredicate) &&
Objects.equals(startLookupValue, other.startLookupValue);
}
return false;
}
/**
* Computes the hashcode of a PlanNode. This method is used to see if two
* plan nodes CAN be equal.
**/
public int hashCode() {
int hash = 7;
// We don't include the index-info since each index is in its own
// tuple file.
hash = 31 * hash + indexTupleFile.hashCode();
hash = 31 * hash + Objects.hashCode(startPredicate);
hash = 31 * hash + Objects.hashCode(endPredicate);
hash = 31 * hash + Objects.hashCode(startLookupValue);
return hash;
}
/**
* Creates a copy of this simple filter node node and its subtree. This
* method is used by {@link PlanNode#duplicate} to copy a plan tree.
*/
@Override
protected PlanNode clone() throws CloneNotSupportedException {
IndexScanNode node = (IndexScanNode) super.clone();
// TODO: Should we clone these?
node.indexInfo = indexInfo;
// The tuple file doesn't need to be copied since it's immutable.
node.indexTupleFile = indexTupleFile;
return node;
}
@Override
public String toString() {
StringBuilder buf = new StringBuilder();
buf.append("IndexScan[");
buf.append("index: ").append(indexInfo.getTableName());
buf.append('.').append(indexInfo.getIndexName());
buf.append(", startLookup: ").append(startLookupValue);
buf.append(", startPred: ").append(startPredicate);
buf.append(", endPred: ").append(endPredicate);
buf.append("]");
return buf.toString();
}
/**
* Currently we will always say that the file-scan node produces unsorted
* results. In actuality, a file scan's results will be sorted if the table
* file uses a sequential format, but currently we don't have any sequential
* file formats.
*/
public List<OrderByExpression> resultsOrderedBy() {
return null;
}
/** This node supports marking. */
public boolean supportsMarking() {
return true;
}
protected void prepareSchema() {
// Grab the schema from the table.
schema = indexTupleFile.getSchema();
}
// Inherit javadocs from base class.
public void prepare() {
// Grab the schema and statistics from the table file.
schema = tableTupleFile.getSchema();
// TODO: We should also update the table statistics based on what the
// index scan is going to do, but that's too complicated, so
// we'll leave them unchanged for now.
TableStats tableStats = tableTupleFile.getStats();
stats = tableStats.getAllColumnStats();
// TODO: Cost the plan node
cost = null;
}
@Override
public void initialize() {
super.initialize();
currentIndexTuple = null;
done = false;
// Reset our marking state.
markedTuple = null;
jumpToMarkedTuple = false;
}
@Override
public Tuple getNextTuple() {
if (done)
return null;
if (jumpToMarkedTuple) {
logger.debug("Resuming at previously marked tuple.");
currentIndexTuple = indexTupleFile.getTuple(markedTuple);
jumpToMarkedTuple = false;
}
else if (currentIndexTuple == null) {
// Navigate to the first tuple.
currentIndexTuple = findFirstIndexTuple();
}
else {
// Go ahead and navigate to the next tuple.
currentIndexTuple = findNextIndexTuple(currentIndexTuple);
if (currentIndexTuple == null)
done = true;
}
Tuple tableTuple = null;
if (currentIndexTuple != null) {
// Now, look up the table tuple based on the index tuple's
// file-pointer.
FilePointer tuplePtr =
(FilePointer) currentIndexTuple.getColumnValue(idxTuplePtr);
currentIndexTuple.unpin();
tableTuple = tableTupleFile.getTuple(tuplePtr);
}
return tableTuple;
}
/**
* This method finds the starting tuple in the index, based on the
* starting predicate and the starting value, as determined from the
* query predicate. The index-tuple is also verified against the ending
* predicate, just in case the two predicates are mutually exclusive.
*
* @return the first tuple in the index that satisfies the starting
* criteria, or {@code null} if no tuples in the index satisfy
* the criteria.
*/
private Tuple findFirstIndexTuple() {
Tuple tup;
// Navigate to first index-tuple based on start-value to lookup
if (indexTupleFile instanceof SequentialTupleFile) {
SequentialTupleFile seqTupFile =
(SequentialTupleFile) indexTupleFile;
tup = seqTupFile.findFirstTupleEquals(startLookupValue);
}
else if (indexTupleFile instanceof HashedTupleFile) {
HashedTupleFile hashTupFile =
(HashedTupleFile) indexTupleFile;
tup = hashTupFile.findFirstTupleEquals(startLookupValue);
}
else {
throw new IllegalStateException("indexTupleFile is neither a " +
"SequentialTupleFile or a HashedTupleFile (got " +
indexTupleFile.getClass().getName() + ")");
}
// Initial lookup returned null. No tuples.
if (tup == null)
return null;
// While our start-predicate isn't true, advance the current index
// tuple.
while (true) {
environment.clear();
environment.addTuple(indexInfo.getSchema(), tup);
if (startPredicate.evaluatePredicate(environment))
break;
tup.unpin();
tup = indexTupleFile.getNextTuple(tup);
}
// Also check the ending predicate. This is only necessary if the
// start- and end-points overlap, and therefore the plan-node actually
// should output nothing. Presumably this will be caught in the code
// that runs before this plan-node.
if (!endPredicate.evaluatePredicate(environment)) {
tup.unpin();
tup = null;
}
// Now we are at the proper starting point in the index tuple sequence
return tup;
}
/**
* Given the "current" tuple in the index, this method finds the next
* tuple in the index, ensuring that it also satisfies the ending
* predicate.
*
* @param tuple the "current" tuple in the index
*
* @return the next tuple in the index that follows the specified tuple,
* or {@code null} if no more tuples in the index satisfy the
* criteria.
*/
private Tuple findNextIndexTuple(Tuple tuple) {
// Get the next tuple from the index file. If it still satisfies the
// ending predicate then return the tuple.
Tuple tup = indexTupleFile.getNextTuple(tuple);
if (tup != null) {
environment.clear();
environment.addTuple(indexInfo.getSchema(), tup);
if (!endPredicate.evaluatePredicate(environment)) {
tup.unpin();
tup = null;
}
}
return tup;
}
public void cleanUp() {
// Nothing to do!
}
public void markCurrentPosition() {
if (currentIndexTuple == null)
throw new IllegalStateException("There is no current tuple!");
logger.debug("Marking current position in tuple-stream.");
markedTuple = currentIndexTuple.getExternalReference();
}
public void resetToLastMark() {
if (markedTuple == null)
throw new IllegalStateException("There is no last-marked tuple!");
logger.debug("Resetting to previously marked position in tuple-stream.");
jumpToMarkedTuple = true;
}
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment