001 /*
002 * JBoss, Home of Professional Open Source.
003 * Copyright 2008, Red Hat Middleware LLC, and individual contributors
004 * as indicated by the @author tags. See the copyright.txt file in the
005 * distribution for a full listing of individual contributors.
006 *
007 * This is free software; you can redistribute it and/or modify it
008 * under the terms of the GNU Lesser General Public License as
009 * published by the Free Software Foundation; either version 2.1 of
010 * the License, or (at your option) any later version.
011 *
012 * This software is distributed in the hope that it will be useful,
013 * but WITHOUT ANY WARRANTY; without even the implied warranty of
014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
015 * Lesser General Public License for more details.
016 *
017 * You should have received a copy of the GNU Lesser General Public
018 * License along with this software; if not, write to the Free
019 * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
020 * 02110-1301 USA, or see the FSF site: http://www.fsf.org.
021 */
022 package org.jboss.dna.common.stats;
023
024 import java.math.BigDecimal;
025 import java.text.DecimalFormat;
026 import java.util.ArrayList;
027 import java.util.Arrays;
028 import java.util.Collections;
029 import java.util.Iterator;
030 import java.util.LinkedList;
031 import java.util.List;
032 import org.jboss.dna.common.math.MathOperations;
033 import org.jboss.dna.common.util.StringUtil;
034
035 public class Histogram<T extends Number> {
036
037 public static final int DEFAULT_BUCKET_COUNT = 10;
038 public static final int DEFAULT_SIGNIFICANT_FIGURES = 4;
039
040 protected final MathOperations<T> math;
041 protected final List<T> values;
042 private int bucketCount = DEFAULT_BUCKET_COUNT;
043 private int significantFigures = DEFAULT_SIGNIFICANT_FIGURES;
044 private BigDecimal bucketWidth;
045 private LinkedList<Bucket> buckets;
046 private BucketingStrategy actualValueStrategy = new DefaultBucketingStrategy();
047 private BucketingStrategy bucketingStrategy = actualValueStrategy;
048
049 public Histogram( MathOperations<T> operations, List<T> values ) {
050 this.math = operations;
051 this.values = new LinkedList<T>(values);
052 this.buckets = new LinkedList<Bucket>();
053 this.bucketWidth = null;
054 // Sort the data using natural order ...
055 Collections.sort(this.values, this.math.getComparator());
056 }
057
058 public Histogram( MathOperations<T> operations, T... values ) {
059 this(operations, Arrays.asList(values));
060 }
061
062 public BucketingStrategy getStrategy() {
063 return this.bucketingStrategy;
064 }
065
066 /**
067 * @return math
068 */
069 public MathOperations<T> getMathOperations() {
070 return this.math;
071 }
072
073 /**
074 * Set the histogram to use the standard deviation to determine the bucket sizes.
075 * @param median
076 * @param standardDeviation
077 * @param sigma
078 */
079 public void setStrategy( double median, double standardDeviation, int sigma ) {
080 this.bucketingStrategy = new StandardDeviationBucketingStrategy(median, standardDeviation, sigma);
081 this.bucketWidth = null;
082 }
083
084 /**
085 * Set the histogram to use the supplied minimum and maximum values to determine the bucket size.
086 * @param minimum
087 * @param maximum
088 */
089 public void setStrategy( T minimum, T maximum ) {
090 this.bucketingStrategy = new ExplicitBucketingStrategy(minimum, maximum);
091 this.bucketWidth = null;
092 }
093
094 /**
095 * Set the histogram to use the actual minimum and maximum values to determine the bucket sizes.
096 */
097 public void setStrategyToDefault() {
098 this.bucketingStrategy = this.actualValueStrategy;
099 this.bucketWidth = null;
100 }
101
102 public int getSignificantFigures() {
103 return significantFigures;
104 }
105
106 /**
107 * Set the number of significant figures used in the calculation of the bucket widths.
108 * @param significantFigures the number of significant figures for the bucket widths
109 * @return this histogram, useful for method-chaining
110 * @see #DEFAULT_SIGNIFICANT_FIGURES
111 */
112 public Histogram<T> setSignificantFigures( int significantFigures ) {
113 if (significantFigures != this.significantFigures) {
114 this.significantFigures = significantFigures;
115 this.bucketWidth = null;
116 this.buckets.clear();
117 }
118 return this;
119 }
120
121 /**
122 * Return the number of buckets in this histogram.
123 * @return the number of buckets.
124 */
125 public int getBucketCount() {
126 return bucketCount;
127 }
128
129 /**
130 * Set the number of buckets that this histogram will use.
131 * @param count the number of buckets
132 * @return this histogram, useful for method-chaining
133 * @see #DEFAULT_BUCKET_COUNT
134 */
135 public Histogram<T> setBucketCount( int count ) {
136 if (count != this.bucketCount) {
137 this.bucketCount = count;
138 this.bucketWidth = null;
139 this.buckets.clear();
140 }
141 return this;
142 }
143
144 /**
145 * Get the buckets in this histogram. If the histogram has not yet been computed, this method will cause it to be generated.
146 * The resulting list should not be modified.
147 * @return the histogram buckets.
148 */
149 public List<Bucket> getBuckets() {
150 compute();
151 return this.buckets;
152 }
153
154 protected void compute() {
155 // Only compute if there is not already a histogram ...
156 if (this.bucketWidth != null) return;
157
158 // Find the lower and upper bounds of the histogram using the strategy ...
159 T lowerBound = this.bucketingStrategy.getLowerBound();
160 T upperBound = this.bucketingStrategy.getUpperBound();
161
162 // Find the actual minimum and maximum values ...
163 T actualMinimum = this.actualValueStrategy.getLowerBound();
164 T actualMaximum = this.actualValueStrategy.getUpperBound();
165
166 // Create the buckets ...
167 List<T> boundaries = getBucketBoundaries(this.math, lowerBound, upperBound, actualMinimum, actualMaximum, this.bucketCount, this.significantFigures);
168 this.buckets.clear();
169 int numBuckets = boundaries.isEmpty() ? 0 : boundaries.size() - 1;
170 for (int i = 0; i != numBuckets; ++i) {
171 this.buckets.add(new Bucket(boundaries.get(i), boundaries.get(i + 1)));
172 }
173
174 // Create the histogram by adding values to each range ...
175 Iterator<Bucket> intervalIterator = this.buckets.iterator();
176 Bucket currentInterval = null;
177 for (T value : this.values) {
178 while (currentInterval == null || currentInterval.checkValue(value, !intervalIterator.hasNext()) > 0) {
179 if (!intervalIterator.hasNext()) break;
180 currentInterval = intervalIterator.next();
181 }
182 if (currentInterval != null) currentInterval.addValue(value);
183 }
184 }
185
186 /**
187 * Return the total number of values that have gone into this histogram.
188 * @return the total number of values
189 * @see Bucket#getPercentageOfValues()
190 */
191 public long getTotalNumberOfValues() {
192 return this.values.size();
193 }
194
195 protected float getMaximumPercentage() {
196 float maxPercentage = 0.0f;
197 for (Bucket bucket : this.buckets) {
198 maxPercentage = Math.max(maxPercentage, bucket.getPercentageOfValues());
199 }
200 return maxPercentage;
201 }
202
203 protected long getMaximumCount() {
204 long maxCount = 0l;
205 for (Bucket bucket : this.buckets) {
206 maxCount = Math.max(maxCount, bucket.getNumberOfValues());
207 }
208 return maxCount;
209 }
210
211 /**
212 * Generate a textual (horizontal) bar graph of this histogram.
213 * @param maxBarLength the maximum bar length, or 0 if the bar length is to represent actual counts
214 * @return the strings that make up the histogram
215 */
216 public List<String> getTextGraph( int maxBarLength ) {
217 compute();
218 if (maxBarLength < 1) maxBarLength = (int)this.getMaximumCount();
219 final float barLengthForHundredPercent = this.buckets.isEmpty() ? maxBarLength : 100.0f * maxBarLength / getMaximumPercentage();
220 final String fullLengthBar = StringUtil.createString('*', (int)barLengthForHundredPercent);
221 List<String> result = new LinkedList<String>();
222 // First calculate the labels and the max length ...
223 int maxLowerBoundLength = 0;
224 int maxUpperBoundLength = 0;
225 for (Bucket bucket : this.buckets) {
226 maxLowerBoundLength = Math.max(bucket.getLowerBound().toString().length(), maxLowerBoundLength);
227 maxUpperBoundLength = Math.max(bucket.getUpperBound().toString().length(), maxUpperBoundLength);
228 }
229
230 // Create the header ...
231 int rangeWidth = 1 + maxLowerBoundLength + 3 + maxUpperBoundLength + 1;
232 int barWidth = maxBarLength + 20;
233 result.add(StringUtil.justifyLeft("Ranges", rangeWidth, ' ') + " Distribution");
234 result.add(StringUtil.createString('-', rangeWidth) + ' ' + StringUtil.createString('-', barWidth));
235 for (Bucket bucket : this.buckets) {
236 float percent = bucket.getPercentageOfValues();
237 long number = bucket.getNumberOfValues();
238 StringBuilder sb = new StringBuilder();
239 sb.append("[");
240 sb.append(StringUtil.justifyLeft(bucket.getLowerBound().toString(), maxLowerBoundLength, ' '));
241 sb.append(" - ");
242 sb.append(StringUtil.justifyLeft(bucket.getUpperBound().toString(), maxUpperBoundLength, ' '));
243 sb.append("] ");
244 int barLength = Math.max((int)(barLengthForHundredPercent * percent / 100.0f), 0);
245 if (barLength == 0 && number != 0) barLength = 1; // make sure there is a bar for all non-zero buckets
246 sb.append(fullLengthBar.substring(0, barLength));
247 if (number != 0) {
248 sb.append(" ");
249 sb.append(number);
250 sb.append(" (");
251 sb.append(new DecimalFormat("###.#").format(percent));
252 sb.append("%)");
253 }
254 result.add(sb.toString());
255 }
256 return result;
257 }
258
259 protected static <T> List<T> getBucketBoundaries( MathOperations<T> math, T lowerBound, T upperBound, T actualMinimum, T actualMaximum, int bucketCount, int bucketWidthSigFigs ) {
260 lowerBound = math.compare(lowerBound, actualMinimum) < 0 ? actualMinimum : lowerBound;
261 upperBound = math.compare(actualMaximum, upperBound) < 0 ? actualMaximum : upperBound;
262 if (math.compare(lowerBound, upperBound) == 0) {
263 List<T> boundaries = new ArrayList<T>();
264 boundaries.add(lowerBound);
265 boundaries.add(upperBound);
266 return boundaries;
267 }
268 final boolean extraLowerBucketNeeded = math.compare(lowerBound, actualMinimum) > 0;
269 final boolean extraUpperBucketNeeded = math.compare(actualMaximum, upperBound) > 0;
270 if (extraLowerBucketNeeded) --bucketCount;
271 if (extraUpperBucketNeeded) --bucketCount;
272
273 // Compute the delta between the lower and upper bound ...
274 T totalWidth = math.subtract(upperBound, lowerBound);
275 int totalWidthScale = math.getExponentInScientificNotation(totalWidth);
276
277 // Modify the lower bound by rounding down to the next lower meaningful value,
278 // using the scale of the totalWidth to determine how to round down.
279 T roundedLowerBound = math.roundDown(lowerBound, -totalWidthScale);
280 T roundedUpperBound = math.roundUp(upperBound, -totalWidthScale);
281
282 // Create the ranges ...
283 double finalLowerBound = math.doubleValue(roundedLowerBound);
284 double finalUpperBound = math.doubleValue(roundedUpperBound);
285 double finalBucketCount = bucketCount;
286 double bucketWidth = (finalUpperBound - finalLowerBound) / finalBucketCount;
287
288 // DoubleOperations doubleOps = new DoubleOperations();
289 // bucketWidth = doubleOps.keepSignificantFigures(bucketWidth,bucketWidthSigFigs);
290
291 List<T> boundaries = new ArrayList<T>();
292 if (bucketWidth > 0.0d) {
293 if (extraLowerBucketNeeded) boundaries.add(actualMinimum);
294 double nextBoundary = finalLowerBound;
295 for (int i = 0; i != bucketCount; ++i) {
296 boundaries.add(math.create(nextBoundary));
297 nextBoundary = nextBoundary + bucketWidth;
298 // nextBoundary = doubleOps.roundUp(nextBoundary + bucketWidth, bucketWidthSigFigs );
299 }
300 boundaries.add(roundedUpperBound);
301 if (extraUpperBucketNeeded) boundaries.add(actualMaximum);
302 }
303 return boundaries;
304 }
305
306 /**
307 * Represents a bucket in a histogram.
308 */
309 public class Bucket implements Comparable<Bucket> {
310
311 private final T lowerBound;
312 private final T upperBound;
313 private final T width;
314 private long numValues;
315
316 protected Bucket( T lowerBound, T upperBound ) {
317 this.lowerBound = lowerBound;
318 this.upperBound = upperBound;
319 this.width = Histogram.this.math.subtract(upperBound, lowerBound);
320 }
321
322 /**
323 * Get the lower bound of this bucket.
324 * @return the lower bound
325 */
326 public T getLowerBound() {
327 return lowerBound;
328 }
329
330 /**
331 * Get the upper bound of this bucket.
332 * @return the upper bound
333 */
334 public T getUpperBound() {
335 return upperBound;
336 }
337
338 /**
339 * Get the width of this bucket.
340 * @return the width
341 */
342 public T getWidth() {
343 return this.width;
344 }
345
346 /**
347 * Return the percentage of values in the histogram that appear in this bucket.
348 * @return the percentage of all values in the histogram that appear in this bucket.
349 */
350 public float getPercentageOfValues() {
351 float total = Histogram.this.getTotalNumberOfValues();
352 if (total == 0.0f) return 0.0f;
353 float numValuesFloat = this.numValues;
354 return 100.0f * numValuesFloat / total;
355 }
356
357 /**
358 * Add a value to this bucket
359 * @param value
360 */
361 protected void addValue( T value ) {
362 ++this.numValues;
363 }
364
365 /**
366 * Get the number of values in this bucket.
367 * @return the number of values
368 */
369 public long getNumberOfValues() {
370 return this.numValues;
371 }
372
373 /**
374 * Check whether the value fits in this bucket.
375 * @param value the value to check
376 * @param isLast
377 * @return 0 if the value fits in this bucket, -1 if the value fits in a prior bucket, or 1 if the value fits in a later
378 * bucket
379 */
380 public int checkValue( T value, boolean isLast ) {
381 if (Histogram.this.math.compare(this.lowerBound, value) > 0) return -1;
382 if (isLast) {
383 if (Histogram.this.math.compare(value, this.upperBound) > 0) return 1;
384 } else {
385 if (Histogram.this.math.compare(value, this.upperBound) >= 0) return 1;
386 }
387 return 0;
388 }
389
390 public int compareTo( Bucket that ) {
391 // This is lower if 'that' has a lowerBound that is greater than 'this' lower bound ...
392 if (Histogram.this.math.compare(this.lowerBound, that.lowerBound) < 0) return -1;
393 if (Histogram.this.math.compare(this.lowerBound, that.lowerBound) > 0) return 1;
394 // The lower bounds are the same, so 'this' is lower if 'that' has an upperBound that is greater than 'this' lower
395 // bound ...
396 if (Histogram.this.math.compare(this.upperBound, that.upperBound) < 0) return -1;
397 if (Histogram.this.math.compare(this.upperBound, that.upperBound) > 0) return 1;
398 return 0;
399 }
400
401 protected Class<T> getNumberClass() {
402 return Histogram.this.math.getOperandClass();
403 }
404
405 @Override
406 public boolean equals( Object obj ) {
407 if (obj != null && obj.getClass() == this.getClass()) {
408 Bucket that = (Bucket)obj;
409 if (this.getNumberClass().isAssignableFrom(that.getNumberClass())) {
410 if (Histogram.this.math.compare(this.lowerBound, that.lowerBound) != 0) return false;
411 if (Histogram.this.math.compare(this.upperBound, that.upperBound) != 0) return false;
412 if (Histogram.this.math.compare(this.width, that.width) != 0) return false;
413 return true;
414 }
415 }
416 return false;
417 }
418
419 @Override
420 public String toString() {
421 return "[" + this.lowerBound + "," + this.upperBound + ")";
422 }
423
424 }
425
426 public abstract class BucketingStrategy {
427
428 public List<T> getValues() {
429 return Histogram.this.values;
430 }
431
432 public abstract T getLowerBound();
433
434 public abstract T getUpperBound();
435 }
436
437 public class DefaultBucketingStrategy extends BucketingStrategy {
438
439 @Override
440 public T getLowerBound() {
441 if (getValues().isEmpty()) return Histogram.this.math.createZeroValue();
442 return getValues().get(0);
443 }
444
445 @Override
446 public T getUpperBound() {
447 if (getValues().isEmpty()) return Histogram.this.math.createZeroValue();
448 return getValues().get(getValues().size() - 1);
449 }
450 }
451
452 public class ExplicitBucketingStrategy extends BucketingStrategy {
453
454 private final T lowerBound;
455 private final T upperBound;
456
457 protected ExplicitBucketingStrategy( T lowerBound, T upperBound ) {
458 this.lowerBound = lowerBound;
459 this.upperBound = upperBound;
460 }
461
462 @Override
463 public T getLowerBound() {
464 return this.lowerBound;
465 }
466
467 @Override
468 public T getUpperBound() {
469 return this.upperBound;
470 }
471 }
472
473 public class StandardDeviationBucketingStrategy extends BucketingStrategy {
474
475 private final double median;
476 private final double standardDeviation;
477 private final int numberOfDeviationsAboveAndBelow;
478
479 protected StandardDeviationBucketingStrategy( double median, double standardDeviation, int numDeviationsAboveAndBelow ) {
480 this.median = median;
481 this.standardDeviation = Math.abs(standardDeviation);
482 this.numberOfDeviationsAboveAndBelow = Math.abs(numDeviationsAboveAndBelow);
483 }
484
485 @Override
486 public T getLowerBound() {
487 double lower = this.median - (standardDeviation * numberOfDeviationsAboveAndBelow);
488 return Histogram.this.math.create(lower);
489 }
490
491 @Override
492 public T getUpperBound() {
493 double upper = this.median + (standardDeviation * numberOfDeviationsAboveAndBelow);
494 return Histogram.this.math.create(upper);
495 }
496 }
497
498 }