1 /*
  2  * Copyright (c) 2012 LabKey Corporation
  3  *
  4  * Licensed under the Apache License, Version 2.0: http://www.apache.org/licenses/LICENSE-2.0
  5  */
  6 
  7 /********** Stats **********/
  8 
  9 if(!LABKEY.vis.Stat){
 10     /**
 11      * @namespace The namespace used for statistics related functions.
 12      */
 13 	LABKEY.vis.Stat = {};
 14 }
 15 
 16 
 17 /**
 18  * Calculates a statistical summary of an array of data. The summary includes Quartiles 1, 2, 3, minimum, maximum and
 19  * the inner quartile range. It is used internally to create box plots.
 20  * @param {Array} data An array of data. Can be an array of any type of object.
 21  * @param {Function} accessor A function that is used to access the value of each item in the array.
 22  * @returns {Object} summary
 23  * @example
 24     var data = [],
 25         accessor,
 26         summary;
 27 
 28     // Let's generate some data.
 29     for (var i = 0; i < 500; i++){
 30         data.push(parseInt(Math.random() * 50));
 31     }
 32 
 33     // Let's define how we access the data.
 34     accessor = function(row){
 35         return row;
 36     }
 37 
 38     // Now we'll get a summary.
 39     summary = LABKEY.vis.Stat.summary(data, accessor);
 40 
 41     console.log(summary);
 42  *
 43  */
 44 LABKEY.vis.Stat.summary = function(data, accessor){
 45     /*
 46         Returns an object with the min, max, Q1, Q2 (median), Q3, interquartile range, and the sorted array of values.
 47      */
 48     var summary = {};
 49 
 50     summary.sortedValues = LABKEY.vis.Stat.sortNumericAscending(data, accessor);
 51     summary.min = summary.sortedValues[0];
 52     summary.max = summary.sortedValues[summary.sortedValues.length -1];
 53     summary.Q1 = LABKEY.vis.Stat.Q1(summary.sortedValues);
 54     summary.Q2 = LABKEY.vis.Stat.Q2(summary.sortedValues);
 55     summary.Q3 = LABKEY.vis.Stat.Q3(summary.sortedValues);
 56     summary.IQR = summary.Q3 - summary.Q1;
 57 
 58     return summary;
 59 };
 60 
 61 /**
 62  * Returns the 1st quartile for a sorted (asc) array.
 63  * @param numbers An array of numbers.
 64  * @returns {Number}
 65  */
 66 LABKEY.vis.Stat.Q1 = function(numbers){
 67     return d3.quantile(numbers,0.25);
 68 };
 69 
 70 /**
 71  * Returns the 2nd quartile (median) for a sorted (asc) array.
 72  * @param numbers An array of numbers.
 73  * @returns {Number}
 74  */
 75 LABKEY.vis.Stat.Q2 = function(numbers){
 76     return d3.quantile(numbers,0.5);
 77 };
 78 
 79 /**
 80  * An alias for {@link LABKEY.vis.Stat.Q2}
 81  */
 82 LABKEY.vis.Stat.MEDIAN = LABKEY.vis.Stat.Q2;
 83 
 84 /**
 85  * Returns the 3rd quartile for a sorted (asc) array.
 86  * @param numbers An array of numbers.
 87  * @returns {Number}
 88  */
 89 LABKEY.vis.Stat.Q3 = function(numbers){
 90     return d3.quantile(numbers,0.75);
 91 };
 92 
 93 /**
 94  * Returns the sum of the array.
 95  * @param numbers An array of numbers.
 96  * @returns {Number}
 97  */
 98 LABKEY.vis.Stat.SUM = function(numbers){
 99     return d3.sum(numbers);
100 };
101 
102 /**
103  * Returns the minimum of the array.
104  * @param numbers An array of numbers.
105  * @returns {Number}
106  */
107 LABKEY.vis.Stat.MIN = function(numbers){
108     return d3.min(numbers);
109 };
110 
111 /**
112  * Returns the maximum of the array.
113  * @param numbers An array of numbers.
114  * @returns {Number}
115  */
116 LABKEY.vis.Stat.MAX = function(numbers){
117     return d3.max(numbers);
118 };
119 
120 /**
121  * Sorts an array of data in ascending order. Removes null/undefined values.
122  * @param {Array} data An array of objects that have numeric values.
123  * @param {Function} accessor A function used to access the numeric value that needs to be sorted.
124  * @returns {Array}
125  */
126 LABKEY.vis.Stat.sortNumericAscending = function(data, accessor){
127     var numbers = [];
128     for(var i = 0; i < data.length; i++){
129         var value = accessor(data[i]);
130         if(value !== null && value !== undefined){
131             numbers.push(value);
132         }
133     }
134     numbers.sort(function(a, b){return a-b;});
135     return numbers;
136 };
137 
138 /**
139  * Sorts an array of data in descending order. Removes null/undefined values.
140  * @param {Array} data An array of objects that have numeric values.
141  * @param {Function} accessor A function used to access the numeric value that needs to be sorted.
142  * @returns {Array}
143  */
144 LABKEY.vis.Stat.sortNumericDescending = function(data, accessor){
145     var numbers = [];
146     for(var i = 0; i < data.length; i++){
147         var value = accessor(data[i]);
148         if(value !== null && value !== undefined){
149             numbers.push(value);
150         }
151     }
152     numbers.sort(function(a, b){return b-a;});
153     return numbers;
154 };
155 
156 /**
157  * Executes a given function n times passing in values between min and max and returns an array of each result. Could
158  * be used to generate data to plot a curve fit as part of a plot.
159  * @param {Function} fn The function to be executed n times. The function must take one number as a parameter.
160  * @param {Number} n The number of times to execute fn.
161  * @param {Number} min The minimum value to pass to fn.
162  * @param {Number} max The maximum value to pass to fn.
163  */
164 LABKEY.vis.Stat.fn = function(fn, n, min, max){
165     if(n === undefined || n === null || n < 2){
166         // We need at least 2 points to make a line.
167         n = 2;
168     }
169 
170     var data = [],
171         stepSize = Math.abs((max - min) / (n-1)),
172         count = min;
173 
174     for(var i = 0; i < n; i++){
175         data.push({x: count, y: fn(count)});
176         count += stepSize;
177     }
178 
179     return data;
180 };
181 
182 /**
183  * Returns the average value.
184  * @param values An array of numbers.
185  * @returns {Number}
186  */
187 LABKEY.vis.Stat.getMean = function(values)
188 {
189     if (values == null || values.length == 0)
190         throw "invalid input";
191     return values.map(function(x,i,arr){return x/arr.length}).reduce(function(a,b){return a + b});
192 };
193 
194 /**
195  * An alias for LABKEY.vis.Stat.getMean
196  */
197 LABKEY.vis.Stat.MEAN = LABKEY.vis.Stat.getMean;
198 
199 /**
200  * Returns the standard deviation.
201  * @param values An array of numbers.
202  * @returns {Number}
203  */
204 LABKEY.vis.Stat.getStdDev = function(values)
205 {
206     if (values == null)
207         throw "invalid input";
208     var mean = LABKEY.vis.Stat.getMean(values);
209     var squareDiffs =  values.map(function(value){
210         var diff = value - mean;
211         return diff * diff;
212     });
213     var avgSquareDiff = LABKEY.vis.Stat.getMean(squareDiffs);
214     return Math.sqrt(avgSquareDiff);
215 };
216 
217 // CUSUM_WEIGHT_FACTOR of 0.5 and CUSUM_CONTROL_LIMIT of 5 to achieve a 3*stdDev boundary
218 LABKEY.vis.Stat.CUSUM_WEIGHT_FACTOR = 0.5;
219 LABKEY.vis.Stat.CUSUM_CONTROL_LIMIT = 5;
220 LABKEY.vis.Stat.CUSUM_CONTROL_LIMIT_LOWER = 0;
221 LABKEY.vis.Stat.CUSUM_EPSILON = 0.0000001;
222 
223 /**
224  * Calculates a variety of cumulative sums for a data array.
225  * @param values Array of data values to calculate from
226  * @param negative True to calculate CUSUM-, false to calculate CUSUM+. (default to false)
227  * @param transform True to calculate CUSUMv (Variability CUSUM), false to calculate CUSUMm (Mean CUSUM). (default to false)
228  * @param forcePositiveResult True to force all result values to be no less than a specified positive value, usually used for log scale. (default to false)
229  * @param epsilon The smallest value that all returned value can be, only used if forcePositiveResult is true. (default to LABKEY.vis.Stat.CUSUM_EPSILON)
230  * @returns {number[]}
231  */
232 LABKEY.vis.Stat.getCUSUM = function(values, negative, transform, forcePositiveResult, epsilon)
233 {
234     if (values == null || values.length < 2)
235         return [];
236     var mean = LABKEY.vis.Stat.getMean(values);
237     var stdDev = LABKEY.vis.Stat.getStdDev(values);
238     if (stdDev == 0) // in the case when all values are equal, calculation has to abort, special case CUSUM to all be 0
239     {
240         var edgeCaseResults = [];
241         for (var k = 0; k < values.length; k++)
242             edgeCaseResults.push(0);
243         return edgeCaseResults;
244     }
245     var cusums = [0];
246     for (var i = 0; i < values.length; i++)
247     {
248         var standardized = (values[i] - mean) / stdDev; //standard value (z-score)
249         if (transform)
250             standardized = (Math.sqrt(Math.abs(standardized)) - 0.822) / 0.349; //the transformed standardize normal quantity value so that it is sensitive to variability changes
251         if (negative)
252             standardized = standardized * -1;
253         var cusum = Math.max(0, standardized - LABKEY.vis.Stat.CUSUM_WEIGHT_FACTOR + cusums[i]);
254         cusums.push(cusum);
255     }
256     cusums.shift(); // remove the initial 0 value
257     if (forcePositiveResult)
258     {
259         var lowerBound = epsilon ? epsilon : LABKEY.vis.Stat.CUSUM_EPSILON;
260         for (var j = 0; j < cusums.length; j++)
261         {
262             cusums[j] = Math.max(cusums[j], lowerBound);
263         }
264     }
265     return cusums;
266 };
267 
268 // MOVING_RANGE_UPPER_LIMIT_WEIGHT is chosen to provide a type I error rate of 0.0027 which guarantees 3*stdDev
269 LABKEY.vis.Stat.MOVING_RANGE_UPPER_LIMIT_WEIGHT = 3.268;
270 LABKEY.vis.Stat.MOVING_RANGE_LOWER_LIMIT = 0;
271 LABKEY.vis.Stat.MOVING_RANGE_EPSILON = 0.0000001;
272 
273 /**
274  * Calculate the moving range values for a data array, which are sequential differences between two successive values.
275  * @param values Array of data values to calculate from
276  * @param forcePositiveResult True to force all result values to be no less than a specified positive value, usually used for log scale. (default to false)
277  * @param epsilon The smallest value that all returned value can be, only used if forcePositiveResult is true. (default to LABKEY.vis.Stat.MOVING_RANGE_EPSILON)
278  * @returns {number[]}
279  */
280 LABKEY.vis.Stat.getMovingRanges = function(values, forcePositiveResult, epsilon)
281 {
282     if (values == null || values.length < 1)
283         return [];
284     var mR = [0]; //mR[0] is always 0
285     for (var i = 1; i < values.length; i++)
286     {
287         mR.push(Math.abs(values[i] - values[i-1]));
288     }
289     if (forcePositiveResult)
290     {
291         var lowerBound = epsilon ? epsilon : LABKEY.vis.Stat.MOVING_RANGE_EPSILON;
292         for (var j = 0; j < mR.length; j++)
293         {
294             mR[j] = Math.max(lowerBound, mR[j]);
295         }
296     }
297     return mR;
298 };
299 
300