1 /* 2 * Copyright (c) 2012 LabKey Corporation 3 * 4 * Licensed under the Apache License, Version 2.0: http://www.apache.org/licenses/LICENSE-2.0 5 */ 6 7 /********** Stats **********/ 8 9 if(!LABKEY.vis.Stat){ 10 /** 11 * @namespace The namespace used for statistics related functions. 12 */ 13 LABKEY.vis.Stat = {}; 14 } 15 16 17 /** 18 * Calculates a statistical summary of an array of data. The summary includes Quartiles 1, 2, 3, minimum, maximum and 19 * the inner quartile range. It is used internally to create box plots. 20 * @param {Array} data An array of data. Can be an array of any type of object. 21 * @param {Function} accessor A function that is used to access the value of each item in the array. 22 * @returns {Object} summary 23 * @example 24 var data = [], 25 accessor, 26 summary; 27 28 // Let's generate some data. 29 for (var i = 0; i < 500; i++){ 30 data.push(parseInt(Math.random() * 50)); 31 } 32 33 // Let's define how we access the data. 34 accessor = function(row){ 35 return row; 36 } 37 38 // Now we'll get a summary. 39 summary = LABKEY.vis.Stat.summary(data, accessor); 40 41 console.log(summary); 42 * 43 */ 44 LABKEY.vis.Stat.summary = function(data, accessor){ 45 /* 46 Returns an object with the min, max, Q1, Q2 (median), Q3, interquartile range, and the sorted array of values. 47 */ 48 var summary = {}; 49 50 summary.sortedValues = LABKEY.vis.Stat.sortNumericAscending(data, accessor); 51 summary.min = summary.sortedValues[0]; 52 summary.max = summary.sortedValues[summary.sortedValues.length -1]; 53 summary.Q1 = LABKEY.vis.Stat.Q1(summary.sortedValues); 54 summary.Q2 = LABKEY.vis.Stat.Q2(summary.sortedValues); 55 summary.Q3 = LABKEY.vis.Stat.Q3(summary.sortedValues); 56 summary.IQR = summary.Q3 - summary.Q1; 57 58 return summary; 59 }; 60 61 /** 62 * Returns the 1st quartile for a sorted (asc) array. 63 * @param numbers An array of numbers. 64 * @returns {Number} 65 */ 66 LABKEY.vis.Stat.Q1 = function(numbers){ 67 return d3.quantile(numbers,0.25); 68 }; 69 70 /** 71 * Returns the 2nd quartile (median) for a sorted (asc) array. 72 * @param numbers An array of numbers. 73 * @returns {Number} 74 */ 75 LABKEY.vis.Stat.Q2 = function(numbers){ 76 return d3.quantile(numbers,0.5); 77 }; 78 79 /** 80 * An alias for {@link LABKEY.vis.Stat.Q2} 81 */ 82 LABKEY.vis.Stat.MEDIAN = LABKEY.vis.Stat.Q2; 83 84 /** 85 * Returns the 3rd quartile for a sorted (asc) array. 86 * @param numbers An array of numbers. 87 * @returns {Number} 88 */ 89 LABKEY.vis.Stat.Q3 = function(numbers){ 90 return d3.quantile(numbers,0.75); 91 }; 92 93 /** 94 * Returns the sum of the array. 95 * @param numbers An array of numbers. 96 * @returns {Number} 97 */ 98 LABKEY.vis.Stat.SUM = function(numbers){ 99 return d3.sum(numbers); 100 }; 101 102 /** 103 * Returns the minimum of the array. 104 * @param numbers An array of numbers. 105 * @returns {Number} 106 */ 107 LABKEY.vis.Stat.MIN = function(numbers){ 108 return d3.min(numbers); 109 }; 110 111 /** 112 * Returns the maximum of the array. 113 * @param numbers An array of numbers. 114 * @returns {Number} 115 */ 116 LABKEY.vis.Stat.MAX = function(numbers){ 117 return d3.max(numbers); 118 }; 119 120 /** 121 * Sorts an array of data in ascending order. Removes null/undefined values. 122 * @param {Array} data An array of objects that have numeric values. 123 * @param {Function} accessor A function used to access the numeric value that needs to be sorted. 124 * @returns {Array} 125 */ 126 LABKEY.vis.Stat.sortNumericAscending = function(data, accessor){ 127 var numbers = []; 128 for(var i = 0; i < data.length; i++){ 129 var value = accessor(data[i]); 130 if(value !== null && value !== undefined){ 131 numbers.push(value); 132 } 133 } 134 numbers.sort(function(a, b){return a-b;}); 135 return numbers; 136 }; 137 138 /** 139 * Sorts an array of data in descending order. Removes null/undefined values. 140 * @param {Array} data An array of objects that have numeric values. 141 * @param {Function} accessor A function used to access the numeric value that needs to be sorted. 142 * @returns {Array} 143 */ 144 LABKEY.vis.Stat.sortNumericDescending = function(data, accessor){ 145 var numbers = []; 146 for(var i = 0; i < data.length; i++){ 147 var value = accessor(data[i]); 148 if(value !== null && value !== undefined){ 149 numbers.push(value); 150 } 151 } 152 numbers.sort(function(a, b){return b-a;}); 153 return numbers; 154 }; 155 156 /** 157 * Executes a given function n times passing in values between min and max and returns an array of each result. Could 158 * be used to generate data to plot a curve fit as part of a plot. 159 * @param {Function} fn The function to be executed n times. The function must take one number as a parameter. 160 * @param {Number} n The number of times to execute fn. 161 * @param {Number} min The minimum value to pass to fn. 162 * @param {Number} max The maximum value to pass to fn. 163 */ 164 LABKEY.vis.Stat.fn = function(fn, n, min, max){ 165 if(n === undefined || n === null || n < 2){ 166 // We need at least 2 points to make a line. 167 n = 2; 168 } 169 170 var data = [], 171 stepSize = Math.abs((max - min) / (n-1)), 172 count = min; 173 174 for(var i = 0; i < n; i++){ 175 data.push({x: count, y: fn(count)}); 176 count += stepSize; 177 } 178 179 return data; 180 }; 181 182 /** 183 * Returns the average value. 184 * @param values An array of numbers. 185 * @returns {Number} 186 */ 187 LABKEY.vis.Stat.getMean = function(values) 188 { 189 if (values == null || values.length == 0) 190 throw "invalid input"; 191 return values.map(function(x,i,arr){return x/arr.length}).reduce(function(a,b){return a + b}); 192 }; 193 194 /** 195 * An alias for LABKEY.vis.Stat.getMean 196 */ 197 LABKEY.vis.Stat.MEAN = LABKEY.vis.Stat.getMean; 198 199 /** 200 * Returns the standard deviation. 201 * @param values An array of numbers. 202 * @returns {Number} 203 */ 204 LABKEY.vis.Stat.getStdDev = function(values) 205 { 206 if (values == null) 207 throw "invalid input"; 208 var mean = LABKEY.vis.Stat.getMean(values); 209 var squareDiffs = values.map(function(value){ 210 var diff = value - mean; 211 return diff * diff; 212 }); 213 var avgSquareDiff = LABKEY.vis.Stat.getMean(squareDiffs); 214 return Math.sqrt(avgSquareDiff); 215 }; 216 217 // CUSUM_WEIGHT_FACTOR of 0.5 and CUSUM_CONTROL_LIMIT of 5 to achieve a 3*stdDev boundary 218 LABKEY.vis.Stat.CUSUM_WEIGHT_FACTOR = 0.5; 219 LABKEY.vis.Stat.CUSUM_CONTROL_LIMIT = 5; 220 LABKEY.vis.Stat.CUSUM_CONTROL_LIMIT_LOWER = 0; 221 LABKEY.vis.Stat.CUSUM_EPSILON = 0.0000001; 222 223 /** 224 * Calculates a variety of cumulative sums for a data array. 225 * @param values Array of data values to calculate from 226 * @param negative True to calculate CUSUM-, false to calculate CUSUM+. (default to false) 227 * @param transform True to calculate CUSUMv (Variability CUSUM), false to calculate CUSUMm (Mean CUSUM). (default to false) 228 * @param forcePositiveResult True to force all result values to be no less than a specified positive value, usually used for log scale. (default to false) 229 * @param epsilon The smallest value that all returned value can be, only used if forcePositiveResult is true. (default to LABKEY.vis.Stat.CUSUM_EPSILON) 230 * @returns {number[]} 231 */ 232 LABKEY.vis.Stat.getCUSUM = function(values, negative, transform, forcePositiveResult, epsilon) 233 { 234 if (values == null || values.length < 2) 235 return []; 236 var mean = LABKEY.vis.Stat.getMean(values); 237 var stdDev = LABKEY.vis.Stat.getStdDev(values); 238 if (stdDev == 0) // in the case when all values are equal, calculation has to abort, special case CUSUM to all be 0 239 { 240 var edgeCaseResults = []; 241 for (var k = 0; k < values.length; k++) 242 edgeCaseResults.push(0); 243 return edgeCaseResults; 244 } 245 var cusums = [0]; 246 for (var i = 0; i < values.length; i++) 247 { 248 var standardized = (values[i] - mean) / stdDev; //standard value (z-score) 249 if (transform) 250 standardized = (Math.sqrt(Math.abs(standardized)) - 0.822) / 0.349; //the transformed standardize normal quantity value so that it is sensitive to variability changes 251 if (negative) 252 standardized = standardized * -1; 253 var cusum = Math.max(0, standardized - LABKEY.vis.Stat.CUSUM_WEIGHT_FACTOR + cusums[i]); 254 cusums.push(cusum); 255 } 256 cusums.shift(); // remove the initial 0 value 257 if (forcePositiveResult) 258 { 259 var lowerBound = epsilon ? epsilon : LABKEY.vis.Stat.CUSUM_EPSILON; 260 for (var j = 0; j < cusums.length; j++) 261 { 262 cusums[j] = Math.max(cusums[j], lowerBound); 263 } 264 } 265 return cusums; 266 }; 267 268 // MOVING_RANGE_UPPER_LIMIT_WEIGHT is chosen to provide a type I error rate of 0.0027 which guarantees 3*stdDev 269 LABKEY.vis.Stat.MOVING_RANGE_UPPER_LIMIT_WEIGHT = 3.268; 270 LABKEY.vis.Stat.MOVING_RANGE_LOWER_LIMIT = 0; 271 LABKEY.vis.Stat.MOVING_RANGE_EPSILON = 0.0000001; 272 273 /** 274 * Calculate the moving range values for a data array, which are sequential differences between two successive values. 275 * @param values Array of data values to calculate from 276 * @param forcePositiveResult True to force all result values to be no less than a specified positive value, usually used for log scale. (default to false) 277 * @param epsilon The smallest value that all returned value can be, only used if forcePositiveResult is true. (default to LABKEY.vis.Stat.MOVING_RANGE_EPSILON) 278 * @returns {number[]} 279 */ 280 LABKEY.vis.Stat.getMovingRanges = function(values, forcePositiveResult, epsilon) 281 { 282 if (values == null || values.length < 1) 283 return []; 284 var mR = [0]; //mR[0] is always 0 285 for (var i = 1; i < values.length; i++) 286 { 287 mR.push(Math.abs(values[i] - values[i-1])); 288 } 289 if (forcePositiveResult) 290 { 291 var lowerBound = epsilon ? epsilon : LABKEY.vis.Stat.MOVING_RANGE_EPSILON; 292 for (var j = 0; j < mR.length; j++) 293 { 294 mR[j] = Math.max(lowerBound, mR[j]); 295 } 296 } 297 return mR; 298 }; 299 300