001 /* 002 * JBoss DNA (http://www.jboss.org/dna) 003 * See the COPYRIGHT.txt file distributed with this work for information 004 * regarding copyright ownership. Some portions may be licensed 005 * to Red Hat, Inc. under one or more contributor license agreements. 006 * See the AUTHORS.txt file in the distribution for a full listing of 007 * individual contributors. 008 * 009 * JBoss DNA is free software. Unless otherwise indicated, all code in JBoss DNA 010 * is licensed to you under the terms of the GNU Lesser General Public License as 011 * published by the Free Software Foundation; either version 2.1 of 012 * the License, or (at your option) any later version. 013 * 014 * JBoss DNA is distributed in the hope that it will be useful, 015 * but WITHOUT ANY WARRANTY; without even the implied warranty of 016 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 017 * Lesser General Public License for more details. 018 * 019 * You should have received a copy of the GNU Lesser General Public 020 * License along with this software; if not, write to the Free 021 * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 022 * 02110-1301 USA, or see the FSF site: http://www.fsf.org. 023 */ 024 package org.jboss.dna.common.statistic; 025 026 import java.math.BigDecimal; 027 import java.text.DecimalFormat; 028 import java.util.ArrayList; 029 import java.util.Arrays; 030 import java.util.Collections; 031 import java.util.Iterator; 032 import java.util.LinkedList; 033 import java.util.List; 034 import org.jboss.dna.common.math.MathOperations; 035 import org.jboss.dna.common.util.StringUtil; 036 037 public class Histogram<T extends Number> { 038 039 public static final int DEFAULT_BUCKET_COUNT = 10; 040 public static final int DEFAULT_SIGNIFICANT_FIGURES = 4; 041 042 protected final MathOperations<T> math; 043 protected final List<T> values; 044 private int bucketCount = DEFAULT_BUCKET_COUNT; 045 private int significantFigures = DEFAULT_SIGNIFICANT_FIGURES; 046 private BigDecimal bucketWidth; 047 private LinkedList<Bucket> buckets; 048 private BucketingStrategy actualValueStrategy = new DefaultBucketingStrategy(); 049 private BucketingStrategy bucketingStrategy = actualValueStrategy; 050 051 public Histogram( MathOperations<T> operations, List<T> values ) { 052 this.math = operations; 053 this.values = new LinkedList<T>(values); 054 this.buckets = new LinkedList<Bucket>(); 055 this.bucketWidth = null; 056 // Sort the data using natural order ... 057 Collections.sort(this.values, this.math.getComparator()); 058 } 059 060 public Histogram( MathOperations<T> operations, T... values ) { 061 this(operations, Arrays.asList(values)); 062 } 063 064 public BucketingStrategy getStrategy() { 065 return this.bucketingStrategy; 066 } 067 068 /** 069 * @return math 070 */ 071 public MathOperations<T> getMathOperations() { 072 return this.math; 073 } 074 075 /** 076 * Set the histogram to use the standard deviation to determine the bucket sizes. 077 * @param median 078 * @param standardDeviation 079 * @param sigma 080 */ 081 public void setStrategy( double median, double standardDeviation, int sigma ) { 082 this.bucketingStrategy = new StandardDeviationBucketingStrategy(median, standardDeviation, sigma); 083 this.bucketWidth = null; 084 } 085 086 /** 087 * Set the histogram to use the supplied minimum and maximum values to determine the bucket size. 088 * @param minimum 089 * @param maximum 090 */ 091 public void setStrategy( T minimum, T maximum ) { 092 this.bucketingStrategy = new ExplicitBucketingStrategy(minimum, maximum); 093 this.bucketWidth = null; 094 } 095 096 /** 097 * Set the histogram to use the actual minimum and maximum values to determine the bucket sizes. 098 */ 099 public void setStrategyToDefault() { 100 this.bucketingStrategy = this.actualValueStrategy; 101 this.bucketWidth = null; 102 } 103 104 public int getSignificantFigures() { 105 return significantFigures; 106 } 107 108 /** 109 * Set the number of significant figures used in the calculation of the bucket widths. 110 * @param significantFigures the number of significant figures for the bucket widths 111 * @return this histogram, useful for method-chaining 112 * @see #DEFAULT_SIGNIFICANT_FIGURES 113 */ 114 public Histogram<T> setSignificantFigures( int significantFigures ) { 115 if (significantFigures != this.significantFigures) { 116 this.significantFigures = significantFigures; 117 this.bucketWidth = null; 118 this.buckets.clear(); 119 } 120 return this; 121 } 122 123 /** 124 * Return the number of buckets in this histogram. 125 * @return the number of buckets. 126 */ 127 public int getBucketCount() { 128 return bucketCount; 129 } 130 131 /** 132 * Set the number of buckets that this histogram will use. 133 * @param count the number of buckets 134 * @return this histogram, useful for method-chaining 135 * @see #DEFAULT_BUCKET_COUNT 136 */ 137 public Histogram<T> setBucketCount( int count ) { 138 if (count != this.bucketCount) { 139 this.bucketCount = count; 140 this.bucketWidth = null; 141 this.buckets.clear(); 142 } 143 return this; 144 } 145 146 /** 147 * Get the buckets in this histogram. If the histogram has not yet been computed, this method will cause it to be generated. 148 * The resulting list should not be modified. 149 * @return the histogram buckets. 150 */ 151 public List<Bucket> getBuckets() { 152 compute(); 153 return this.buckets; 154 } 155 156 protected void compute() { 157 // Only compute if there is not already a histogram ... 158 if (this.bucketWidth != null) return; 159 160 // Find the lower and upper bounds of the histogram using the strategy ... 161 T lowerBound = this.bucketingStrategy.getLowerBound(); 162 T upperBound = this.bucketingStrategy.getUpperBound(); 163 164 // Find the actual minimum and maximum values ... 165 T actualMinimum = this.actualValueStrategy.getLowerBound(); 166 T actualMaximum = this.actualValueStrategy.getUpperBound(); 167 168 // Create the buckets ... 169 List<T> boundaries = getBucketBoundaries(this.math, lowerBound, upperBound, actualMinimum, actualMaximum, this.bucketCount, this.significantFigures); 170 this.buckets.clear(); 171 int numBuckets = boundaries.isEmpty() ? 0 : boundaries.size() - 1; 172 for (int i = 0; i != numBuckets; ++i) { 173 this.buckets.add(new Bucket(boundaries.get(i), boundaries.get(i + 1))); 174 } 175 176 // Create the histogram by adding values to each range ... 177 Iterator<Bucket> intervalIterator = this.buckets.iterator(); 178 Bucket currentInterval = null; 179 for (T value : this.values) { 180 while (currentInterval == null || currentInterval.checkValue(value, !intervalIterator.hasNext()) > 0) { 181 if (!intervalIterator.hasNext()) break; 182 currentInterval = intervalIterator.next(); 183 } 184 if (currentInterval != null) currentInterval.addValue(value); 185 } 186 } 187 188 /** 189 * Return the total number of values that have gone into this histogram. 190 * @return the total number of values 191 * @see Bucket#getPercentageOfValues() 192 */ 193 public long getTotalNumberOfValues() { 194 return this.values.size(); 195 } 196 197 protected float getMaximumPercentage() { 198 float maxPercentage = 0.0f; 199 for (Bucket bucket : this.buckets) { 200 maxPercentage = Math.max(maxPercentage, bucket.getPercentageOfValues()); 201 } 202 return maxPercentage; 203 } 204 205 protected long getMaximumCount() { 206 long maxCount = 0l; 207 for (Bucket bucket : this.buckets) { 208 maxCount = Math.max(maxCount, bucket.getNumberOfValues()); 209 } 210 return maxCount; 211 } 212 213 /** 214 * Generate a textual (horizontal) bar graph of this histogram. 215 * @param maxBarLength the maximum bar length, or 0 if the bar length is to represent actual counts 216 * @return the strings that make up the histogram 217 */ 218 public List<String> getTextGraph( int maxBarLength ) { 219 compute(); 220 if (maxBarLength < 1) maxBarLength = (int)this.getMaximumCount(); 221 final float barLengthForHundredPercent = this.buckets.isEmpty() ? maxBarLength : 100.0f * maxBarLength / getMaximumPercentage(); 222 final String fullLengthBar = StringUtil.createString('*', (int)barLengthForHundredPercent); 223 List<String> result = new LinkedList<String>(); 224 // First calculate the labels and the max length ... 225 int maxLowerBoundLength = 0; 226 int maxUpperBoundLength = 0; 227 for (Bucket bucket : this.buckets) { 228 maxLowerBoundLength = Math.max(bucket.getLowerBound().toString().length(), maxLowerBoundLength); 229 maxUpperBoundLength = Math.max(bucket.getUpperBound().toString().length(), maxUpperBoundLength); 230 } 231 232 // Create the header ... 233 int rangeWidth = 1 + maxLowerBoundLength + 3 + maxUpperBoundLength + 1; 234 int barWidth = maxBarLength + 20; 235 result.add(StringUtil.justifyLeft("Ranges", rangeWidth, ' ') + " Distribution"); 236 result.add(StringUtil.createString('-', rangeWidth) + ' ' + StringUtil.createString('-', barWidth)); 237 for (Bucket bucket : this.buckets) { 238 float percent = bucket.getPercentageOfValues(); 239 long number = bucket.getNumberOfValues(); 240 StringBuilder sb = new StringBuilder(); 241 sb.append("["); 242 sb.append(StringUtil.justifyLeft(bucket.getLowerBound().toString(), maxLowerBoundLength, ' ')); 243 sb.append(" - "); 244 sb.append(StringUtil.justifyLeft(bucket.getUpperBound().toString(), maxUpperBoundLength, ' ')); 245 sb.append("] "); 246 int barLength = Math.max((int)(barLengthForHundredPercent * percent / 100.0f), 0); 247 if (barLength == 0 && number != 0) barLength = 1; // make sure there is a bar for all non-zero buckets 248 sb.append(fullLengthBar.substring(0, barLength)); 249 if (number != 0) { 250 sb.append(" "); 251 sb.append(number); 252 sb.append(" ("); 253 sb.append(new DecimalFormat("###.#").format(percent)); 254 sb.append("%)"); 255 } 256 result.add(sb.toString()); 257 } 258 return result; 259 } 260 261 protected static <T> List<T> getBucketBoundaries( MathOperations<T> math, T lowerBound, T upperBound, T actualMinimum, T actualMaximum, int bucketCount, int bucketWidthSigFigs ) { 262 lowerBound = math.compare(lowerBound, actualMinimum) < 0 ? actualMinimum : lowerBound; 263 upperBound = math.compare(actualMaximum, upperBound) < 0 ? actualMaximum : upperBound; 264 if (math.compare(lowerBound, upperBound) == 0) { 265 List<T> boundaries = new ArrayList<T>(); 266 boundaries.add(lowerBound); 267 boundaries.add(upperBound); 268 return boundaries; 269 } 270 final boolean extraLowerBucketNeeded = math.compare(lowerBound, actualMinimum) > 0; 271 final boolean extraUpperBucketNeeded = math.compare(actualMaximum, upperBound) > 0; 272 if (extraLowerBucketNeeded) --bucketCount; 273 if (extraUpperBucketNeeded) --bucketCount; 274 275 // Compute the delta between the lower and upper bound ... 276 T totalWidth = math.subtract(upperBound, lowerBound); 277 int totalWidthScale = math.getExponentInScientificNotation(totalWidth); 278 279 // Modify the lower bound by rounding down to the next lower meaningful value, 280 // using the scale of the totalWidth to determine how to round down. 281 T roundedLowerBound = math.roundDown(lowerBound, -totalWidthScale); 282 T roundedUpperBound = math.roundUp(upperBound, -totalWidthScale); 283 284 // Create the ranges ... 285 double finalLowerBound = math.doubleValue(roundedLowerBound); 286 double finalUpperBound = math.doubleValue(roundedUpperBound); 287 double finalBucketCount = bucketCount; 288 double bucketWidth = (finalUpperBound - finalLowerBound) / finalBucketCount; 289 290 // DoubleOperations doubleOps = new DoubleOperations(); 291 // bucketWidth = doubleOps.keepSignificantFigures(bucketWidth,bucketWidthSigFigs); 292 293 List<T> boundaries = new ArrayList<T>(); 294 if (bucketWidth > 0.0d) { 295 if (extraLowerBucketNeeded) boundaries.add(actualMinimum); 296 double nextBoundary = finalLowerBound; 297 for (int i = 0; i != bucketCount; ++i) { 298 boundaries.add(math.create(nextBoundary)); 299 nextBoundary = nextBoundary + bucketWidth; 300 // nextBoundary = doubleOps.roundUp(nextBoundary + bucketWidth, bucketWidthSigFigs ); 301 } 302 boundaries.add(roundedUpperBound); 303 if (extraUpperBucketNeeded) boundaries.add(actualMaximum); 304 } 305 return boundaries; 306 } 307 308 /** 309 * Represents a bucket in a histogram. 310 */ 311 public class Bucket implements Comparable<Bucket> { 312 313 private final T lowerBound; 314 private final T upperBound; 315 private final T width; 316 private long numValues; 317 318 protected Bucket( T lowerBound, T upperBound ) { 319 this.lowerBound = lowerBound; 320 this.upperBound = upperBound; 321 this.width = Histogram.this.math.subtract(upperBound, lowerBound); 322 } 323 324 /** 325 * Get the lower bound of this bucket. 326 * @return the lower bound 327 */ 328 public T getLowerBound() { 329 return lowerBound; 330 } 331 332 /** 333 * Get the upper bound of this bucket. 334 * @return the upper bound 335 */ 336 public T getUpperBound() { 337 return upperBound; 338 } 339 340 /** 341 * Get the width of this bucket. 342 * @return the width 343 */ 344 public T getWidth() { 345 return this.width; 346 } 347 348 /** 349 * Return the percentage of values in the histogram that appear in this bucket. 350 * @return the percentage of all values in the histogram that appear in this bucket. 351 */ 352 public float getPercentageOfValues() { 353 float total = Histogram.this.getTotalNumberOfValues(); 354 if (total == 0.0f) return 0.0f; 355 float numValuesFloat = this.numValues; 356 return 100.0f * numValuesFloat / total; 357 } 358 359 /** 360 * Add a value to this bucket 361 * @param value 362 */ 363 protected void addValue( T value ) { 364 ++this.numValues; 365 } 366 367 /** 368 * Get the number of values in this bucket. 369 * @return the number of values 370 */ 371 public long getNumberOfValues() { 372 return this.numValues; 373 } 374 375 /** 376 * Check whether the value fits in this bucket. 377 * @param value the value to check 378 * @param isLast 379 * @return 0 if the value fits in this bucket, -1 if the value fits in a prior bucket, or 1 if the value fits in a later 380 * bucket 381 */ 382 public int checkValue( T value, boolean isLast ) { 383 if (Histogram.this.math.compare(this.lowerBound, value) > 0) return -1; 384 if (isLast) { 385 if (Histogram.this.math.compare(value, this.upperBound) > 0) return 1; 386 } else { 387 if (Histogram.this.math.compare(value, this.upperBound) >= 0) return 1; 388 } 389 return 0; 390 } 391 392 public int compareTo( Bucket that ) { 393 // This is lower if 'that' has a lowerBound that is greater than 'this' lower bound ... 394 if (Histogram.this.math.compare(this.lowerBound, that.lowerBound) < 0) return -1; 395 if (Histogram.this.math.compare(this.lowerBound, that.lowerBound) > 0) return 1; 396 // The lower bounds are the same, so 'this' is lower if 'that' has an upperBound that is greater than 'this' lower 397 // bound ... 398 if (Histogram.this.math.compare(this.upperBound, that.upperBound) < 0) return -1; 399 if (Histogram.this.math.compare(this.upperBound, that.upperBound) > 0) return 1; 400 return 0; 401 } 402 403 protected Class<T> getNumberClass() { 404 return Histogram.this.math.getOperandClass(); 405 } 406 407 @Override 408 public boolean equals( Object obj ) { 409 if (obj != null && obj.getClass() == this.getClass()) { 410 Bucket that = (Bucket)obj; 411 if (this.getNumberClass().isAssignableFrom(that.getNumberClass())) { 412 if (Histogram.this.math.compare(this.lowerBound, that.lowerBound) != 0) return false; 413 if (Histogram.this.math.compare(this.upperBound, that.upperBound) != 0) return false; 414 if (Histogram.this.math.compare(this.width, that.width) != 0) return false; 415 return true; 416 } 417 } 418 return false; 419 } 420 421 @Override 422 public String toString() { 423 return "[" + this.lowerBound + "," + this.upperBound + ")"; 424 } 425 426 } 427 428 public abstract class BucketingStrategy { 429 430 public List<T> getValues() { 431 return Histogram.this.values; 432 } 433 434 public abstract T getLowerBound(); 435 436 public abstract T getUpperBound(); 437 } 438 439 public class DefaultBucketingStrategy extends BucketingStrategy { 440 441 @Override 442 public T getLowerBound() { 443 if (getValues().isEmpty()) return Histogram.this.math.createZeroValue(); 444 return getValues().get(0); 445 } 446 447 @Override 448 public T getUpperBound() { 449 if (getValues().isEmpty()) return Histogram.this.math.createZeroValue(); 450 return getValues().get(getValues().size() - 1); 451 } 452 } 453 454 public class ExplicitBucketingStrategy extends BucketingStrategy { 455 456 private final T lowerBound; 457 private final T upperBound; 458 459 protected ExplicitBucketingStrategy( T lowerBound, T upperBound ) { 460 this.lowerBound = lowerBound; 461 this.upperBound = upperBound; 462 } 463 464 @Override 465 public T getLowerBound() { 466 return this.lowerBound; 467 } 468 469 @Override 470 public T getUpperBound() { 471 return this.upperBound; 472 } 473 } 474 475 public class StandardDeviationBucketingStrategy extends BucketingStrategy { 476 477 private final double median; 478 private final double standardDeviation; 479 private final int numberOfDeviationsAboveAndBelow; 480 481 protected StandardDeviationBucketingStrategy( double median, double standardDeviation, int numDeviationsAboveAndBelow ) { 482 this.median = median; 483 this.standardDeviation = Math.abs(standardDeviation); 484 this.numberOfDeviationsAboveAndBelow = Math.abs(numDeviationsAboveAndBelow); 485 } 486 487 @Override 488 public T getLowerBound() { 489 double lower = this.median - (standardDeviation * numberOfDeviationsAboveAndBelow); 490 return Histogram.this.math.create(lower); 491 } 492 493 @Override 494 public T getUpperBound() { 495 double upper = this.median + (standardDeviation * numberOfDeviationsAboveAndBelow); 496 return Histogram.this.math.create(upper); 497 } 498 } 499 500 }