From 7afe6bee71ebc1b7c2ea1809963915914349333b Mon Sep 17 00:00:00 2001
From: Donnie Pinkston <donnie@cms.caltech.edu>
Date: Mon, 25 Feb 2019 09:21:49 -0800
Subject: [PATCH] Add comparison modes to TupleComparator

The TupleComparator has now been upgraded to support multiple comparison
modes when performing partial comparisons.
---
 .../nanodb/expressions/TupleComparator.java   | 74 ++++++++++++++-----
 .../expressions/TestTupleComparator.java      | 65 ++++++++++++++--
 2 files changed, 112 insertions(+), 27 deletions(-)

diff --git a/src/main/java/edu/caltech/nanodb/expressions/TupleComparator.java b/src/main/java/edu/caltech/nanodb/expressions/TupleComparator.java
index 1e99bab..d596390 100644
--- a/src/main/java/edu/caltech/nanodb/expressions/TupleComparator.java
+++ b/src/main/java/edu/caltech/nanodb/expressions/TupleComparator.java
@@ -20,6 +20,38 @@ import edu.caltech.nanodb.util.Pair;
  */
 public class TupleComparator implements Comparator<Tuple> {
 
+    /**
+     * This enumeration specifies available comparison modes that the
+     * {@link TupleComparator} is able to use when comparing tuples.
+     */
+    public enum CompareMode {
+        /**
+         * It is an error to compare two tuples of different lengths (i.e.
+         * number of columns).
+         */
+        SAME_LENGTH,
+
+        /**
+         * When different-length tuples are compared, the extra column-values
+         * in the longer tuple are completely ignored.  For example, a tuple
+         * <tt>[1, 'A']</tt> will compare as equal to another tuple
+         * <tt>[1, 'A', 532]</tt>.  A consequence of this is that the empty
+         * tuple <tt>[]</tt> will compare as equal to all other tuples.
+         */
+        IGNORE_LENGTH,
+
+        /**
+         * When different-length tuples are compared, if the leading columns
+         * have the same values then the shorter tuple will compare as "less."
+         * For example, a tuple <tt>[1, 'A']</tt> will compare as less than
+         * another tuple <tt>[1, 'A', 532]</tt>.  A consequence of this is
+         * that the empty tuple <tt>[]</tt> will compare as less than to all
+         * other tuples.
+         */
+        SHORTER_IS_LESS
+    }
+
+
     /**
      * The schema of the tuples that will be compared by this comparator object.
      */
@@ -244,7 +276,7 @@ public class TupleComparator implements Comparator<Tuple> {
      *         sizes.
      */
     public static int compareTuples(Tuple t1, Tuple t2) {
-        return _compareTuples(t1, t2, false);
+        return _compareTuples(t1, t2, CompareMode.SAME_LENGTH);
     }
 
 
@@ -279,7 +311,13 @@ public class TupleComparator implements Comparator<Tuple> {
      *         the two inputs
      */
     public static int comparePartialTuples(Tuple t1, Tuple t2) {
-        return _compareTuples(t1, t2, true);
+        return comparePartialTuples(t1, t2, CompareMode.IGNORE_LENGTH);
+    }
+
+
+    public static int comparePartialTuples(Tuple t1, Tuple t2,
+                                           CompareMode compareMode) {
+        return _compareTuples(t1, t2, compareMode);
     }
 
 
@@ -293,44 +331,37 @@ public class TupleComparator implements Comparator<Tuple> {
      *
      * @param t2 the second tuple to compare.  Must not be {@code null}.
      *
-     * @param allowSizeMismatch true if the two tuples are allowed to be
-     *        different sizes, or false if they must be the same size.
+     * @param compareMode specifies how to handle tuples of different sizes.
      *
      * @return a negative, positive, or zero value indicating the ordering of
      *         the two inputs
      */
     @SuppressWarnings("unchecked")
     private static int _compareTuples(Tuple t1, Tuple t2,
-                                      boolean allowSizeMismatch) {
+                                      CompareMode compareMode) {
         if (t1 == null)
             throw new IllegalArgumentException("t1 cannot be null");
 
         if (t2 == null)
             throw new IllegalArgumentException("t2 cannot be null");
 
+        if (compareMode == null)
+            throw new IllegalArgumentException("compareMode cannot be null");
+
         int t1Size = t1.getColumnCount();
         int t2Size = t2.getColumnCount();
 
-        if (!allowSizeMismatch) {
+        if (compareMode == CompareMode.SAME_LENGTH) {
             if (t1Size != t2Size)
                 throw new IllegalArgumentException("tuples must be the same size");
         }
-        else {
-            // If one of the tuples is an empty tuple, and the other one is
-            // not, we define this as the empty tuple being less than the
-            // non-empty tuple.
-            if (t1Size == 0 || t2Size == 0)
-                return t1Size - t2Size;
-
-            // Now we know that both tuples have at least one column.
-            // Only compare the columns that are present in both tuples.
-            t1Size = Math.min(t1Size, t2Size);
-            t2Size = t1Size;
-        }
+
+        // Only compare the columns that are present in both tuples.
+        int compareCols = Math.min(t1Size, t2Size);
 
         int compareResult = 0;
         int i = 0;
-        while (i < t1Size && compareResult == 0) {
+        while (i < compareCols && compareResult == 0) {
             Object objA = t1.getColumnValue(i);
             Object objB = t2.getColumnValue(i);
 
@@ -358,6 +389,11 @@ public class TupleComparator implements Comparator<Tuple> {
             i++;
         }
 
+        if (compareResult == 0 && t1Size != t2Size &&
+            compareMode == CompareMode.SHORTER_IS_LESS) {
+            compareResult = t1Size - t2Size;
+        }
+
         return compareResult;
     }
 }
diff --git a/src/test/java/edu/caltech/test/nanodb/expressions/TestTupleComparator.java b/src/test/java/edu/caltech/test/nanodb/expressions/TestTupleComparator.java
index 66d6bdd..a5ac178 100644
--- a/src/test/java/edu/caltech/test/nanodb/expressions/TestTupleComparator.java
+++ b/src/test/java/edu/caltech/test/nanodb/expressions/TestTupleComparator.java
@@ -285,7 +285,7 @@ public class TestTupleComparator {
     }
 
 
-    public void testComparePartialTuplesDifferentLengths() {
+    public void testComparePartialTuplesIgnoreLength() {
         TupleLiteral t0 = new TupleLiteral();
         TupleLiteral t1 = new TupleLiteral(new Object[]{null});
         TupleLiteral t2 = new TupleLiteral(1);
@@ -297,16 +297,17 @@ public class TestTupleComparator {
         assert TupleComparator.comparePartialTuples(t0, t0) == 0;
         assert TupleComparator.comparePartialTuples(t2, t2) == 0;
 
-        // Empty tuple should always compare less than all other tuples.
+        // In "IGNORE_LENGTH" mode, empty tuple always compares equal to
+        // all other tuples.
 
-        assert TupleComparator.comparePartialTuples(t0, t2) < 0;
-        assert TupleComparator.comparePartialTuples(t2, t0) > 0;
+        assert TupleComparator.comparePartialTuples(t0, t2) == 0;
+        assert TupleComparator.comparePartialTuples(t2, t0) == 0;
 
-        assert TupleComparator.comparePartialTuples(t0, t1) < 0;
-        assert TupleComparator.comparePartialTuples(t1, t0) > 0;
+        assert TupleComparator.comparePartialTuples(t0, t1) == 0;
+        assert TupleComparator.comparePartialTuples(t1, t0) == 0;
 
-        assert TupleComparator.comparePartialTuples(t0, t5) < 0;
-        assert TupleComparator.comparePartialTuples(t5, t0) > 0;
+        assert TupleComparator.comparePartialTuples(t0, t5) == 0;
+        assert TupleComparator.comparePartialTuples(t5, t0) == 0;
 
         // Compare tuples of different lengths - only the common columns
         // should be used.
@@ -328,6 +329,54 @@ public class TestTupleComparator {
     }
 
 
+    public void testComparePartialTuplesShorterIsLess() {
+        TupleLiteral t0 = new TupleLiteral();
+        TupleLiteral t1 = new TupleLiteral(new Object[]{null});
+        TupleLiteral t2 = new TupleLiteral(1);
+        TupleLiteral t3 = new TupleLiteral(1, 2);
+        TupleLiteral t5 = new TupleLiteral(1, 2, 2);
+        TupleLiteral t7 = new TupleLiteral(1, null, 3);
+        TupleLiteral t8 = new TupleLiteral(1, 2, null);
+
+        // Just for less typing, oi!
+        final TupleComparator.CompareMode mode =
+            TupleComparator.CompareMode.SHORTER_IS_LESS;
+
+        assert TupleComparator.comparePartialTuples(t0, t0, mode) == 0;
+        assert TupleComparator.comparePartialTuples(t2, t2, mode) == 0;
+
+        // Empty tuple should always compare less than all other tuples.
+
+        assert TupleComparator.comparePartialTuples(t0, t2, mode) < 0;
+        assert TupleComparator.comparePartialTuples(t2, t0, mode) > 0;
+
+        assert TupleComparator.comparePartialTuples(t0, t1, mode) < 0;
+        assert TupleComparator.comparePartialTuples(t1, t0, mode) > 0;
+
+        assert TupleComparator.comparePartialTuples(t0, t5, mode) < 0;
+        assert TupleComparator.comparePartialTuples(t5, t0, mode) > 0;
+
+        // Compare tuples of different lengths - the common columns should be
+        // used, but if all common columns are equal, the shorter tuple is
+        // still considered to be "less than" the longer tuple.
+
+        assert TupleComparator.comparePartialTuples(t2, t3, mode) < 0;
+        assert TupleComparator.comparePartialTuples(t3, t2, mode) > 0;
+
+        assert TupleComparator.comparePartialTuples(t3, t5, mode) < 0;
+        assert TupleComparator.comparePartialTuples(t5, t3, mode) > 0;
+
+        assert TupleComparator.comparePartialTuples(t2, t5, mode) < 0;
+        assert TupleComparator.comparePartialTuples(t5, t2, mode) > 0;
+
+        assert TupleComparator.comparePartialTuples(t2, t7, mode) < 0;
+        assert TupleComparator.comparePartialTuples(t7, t2, mode) > 0;
+
+        assert TupleComparator.comparePartialTuples(t3, t8, mode) < 0;
+        assert TupleComparator.comparePartialTuples(t8, t3, mode) > 0;
+    }
+
+
     public void testComparePartialTuplesExceptions() {
         TupleLiteral t = new TupleLiteral(1, 2, 3);
 
-- 
GitLab