1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
package edu.caltech.nanodb.functions;
import java.util.ArrayList;
import java.util.List;
import edu.caltech.nanodb.expressions.ArithmeticOperator;
import edu.caltech.nanodb.expressions.Expression;
import edu.caltech.nanodb.relations.ColumnType;
import edu.caltech.nanodb.relations.Schema;
/**
* This aggregate function can be used to compute either the standard deviation
* or the variance of a collection of values.
*/
public class StdDevVarAggregate extends AggregateFunction {
private boolean computeStdDev;
private Object sum;
private ArrayList<Object> values;
public StdDevVarAggregate(boolean computeStdDev) {
super(/* supportsDistinct */ false);
this.computeStdDev = computeStdDev;
}
@Override
public void clearResult() {
sum = null;
values = null;
}
@Override
public void addValue(Object value) {
if (value == null)
return;
if (values == null) {
// This is the first value. Create a new array list and store it.
values = new ArrayList<Object>();
values.add(value);
} else {
// Store the new value
values.add(value);
}
if (sum == null) {
// This is the first value. Store it.
sum = value;
}
else {
// Add in the new value.
sum = ArithmeticOperator.evalObjects(ArithmeticOperator.Type.ADD,
sum, value);
}
}
@Override
public Object getResult() {
if (sum == null || values == null)
return null;
else {
// TODO: Need to generate NUMERIC result. Using double right now.
double count = (double) values.size();
// Compute average from the sum and count.
Object avg = ArithmeticOperator.evalObjects(
ArithmeticOperator.Type.DIVIDE, sum, count);
// Compute the sum of the square of the residuals.
Object sumSquaresResids = squareDifference(values.get(0), avg);
for (int i = 1; i < count; i++) {
sumSquaresResids = ArithmeticOperator.evalObjects(
ArithmeticOperator.Type.ADD, sumSquaresResids,
squareDifference(values.get(i), avg));
}
// Compute the variance.
Object var = ArithmeticOperator.evalObjects(
ArithmeticOperator.Type.DIVIDE, sumSquaresResids, count);
// Compute standard deviation if necessary.
if (computeStdDev) {
return ArithmeticOperator.evalObjects(
ArithmeticOperator.Type.POWER, var, Double.valueOf(.5));
}
else {
return var;
}
}
}
@Override
public ColumnType getReturnType(List<Expression> args, Schema schema) {
if (args.size() != 1) {
throw new IllegalArgumentException(
"Stddev/variance aggregate function takes 1 argument; got " +
args.size());
}
// When finding the min or max, the resulting aggregate column is the
// same type as the values of the column.
return args.get(0).getColumnInfo(schema).getType();
}
/**
* Helper function that computes the square of the difference between
* two values.
*/
private Object squareDifference(Object value, Object avg) {
return ArithmeticOperator.evalObjects(ArithmeticOperator.Type.POWER,
ArithmeticOperator.evalObjects(ArithmeticOperator.Type.SUBTRACT, value, avg),
Integer.valueOf(2));
}
}