001 /*
002 * JBoss DNA (http://www.jboss.org/dna)
003 * See the COPYRIGHT.txt file distributed with this work for information
004 * regarding copyright ownership. Some portions may be licensed
005 * to Red Hat, Inc. under one or more contributor license agreements.
006 * See the AUTHORS.txt file in the distribution for a full listing of
007 * individual contributors.
008 *
009 * JBoss DNA is free software. Unless otherwise indicated, all code in JBoss DNA
010 * is licensed to you under the terms of the GNU Lesser General Public License as
011 * published by the Free Software Foundation; either version 2.1 of
012 * the License, or (at your option) any later version.
013 *
014 * JBoss DNA is distributed in the hope that it will be useful,
015 * but WITHOUT ANY WARRANTY; without even the implied warranty of
016 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
017 * Lesser General Public License for more details.
018 *
019 * You should have received a copy of the GNU Lesser General Public
020 * License along with this software; if not, write to the Free
021 * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
022 * 02110-1301 USA, or see the FSF site: http://www.fsf.org.
023 */
024 package org.jboss.dna.common.statistic;
025
026 import java.math.BigDecimal;
027 import java.text.DecimalFormat;
028 import java.util.ArrayList;
029 import java.util.Arrays;
030 import java.util.Collections;
031 import java.util.Iterator;
032 import java.util.LinkedList;
033 import java.util.List;
034 import org.jboss.dna.common.math.MathOperations;
035 import org.jboss.dna.common.util.StringUtil;
036
037 public class Histogram<T extends Number> {
038
039 public static final int DEFAULT_BUCKET_COUNT = 10;
040 public static final int DEFAULT_SIGNIFICANT_FIGURES = 4;
041
042 protected final MathOperations<T> math;
043 protected final List<T> values;
044 private int bucketCount = DEFAULT_BUCKET_COUNT;
045 private int significantFigures = DEFAULT_SIGNIFICANT_FIGURES;
046 private BigDecimal bucketWidth;
047 private LinkedList<Bucket> buckets;
048 private BucketingStrategy actualValueStrategy = new DefaultBucketingStrategy();
049 private BucketingStrategy bucketingStrategy = actualValueStrategy;
050
051 public Histogram( MathOperations<T> operations, List<T> values ) {
052 this.math = operations;
053 this.values = new LinkedList<T>(values);
054 this.buckets = new LinkedList<Bucket>();
055 this.bucketWidth = null;
056 // Sort the data using natural order ...
057 Collections.sort(this.values, this.math.getComparator());
058 }
059
060 public Histogram( MathOperations<T> operations, T... values ) {
061 this(operations, Arrays.asList(values));
062 }
063
064 public BucketingStrategy getStrategy() {
065 return this.bucketingStrategy;
066 }
067
068 /**
069 * @return math
070 */
071 public MathOperations<T> getMathOperations() {
072 return this.math;
073 }
074
075 /**
076 * Set the histogram to use the standard deviation to determine the bucket sizes.
077 * @param median
078 * @param standardDeviation
079 * @param sigma
080 */
081 public void setStrategy( double median, double standardDeviation, int sigma ) {
082 this.bucketingStrategy = new StandardDeviationBucketingStrategy(median, standardDeviation, sigma);
083 this.bucketWidth = null;
084 }
085
086 /**
087 * Set the histogram to use the supplied minimum and maximum values to determine the bucket size.
088 * @param minimum
089 * @param maximum
090 */
091 public void setStrategy( T minimum, T maximum ) {
092 this.bucketingStrategy = new ExplicitBucketingStrategy(minimum, maximum);
093 this.bucketWidth = null;
094 }
095
096 /**
097 * Set the histogram to use the actual minimum and maximum values to determine the bucket sizes.
098 */
099 public void setStrategyToDefault() {
100 this.bucketingStrategy = this.actualValueStrategy;
101 this.bucketWidth = null;
102 }
103
104 public int getSignificantFigures() {
105 return significantFigures;
106 }
107
108 /**
109 * Set the number of significant figures used in the calculation of the bucket widths.
110 * @param significantFigures the number of significant figures for the bucket widths
111 * @return this histogram, useful for method-chaining
112 * @see #DEFAULT_SIGNIFICANT_FIGURES
113 */
114 public Histogram<T> setSignificantFigures( int significantFigures ) {
115 if (significantFigures != this.significantFigures) {
116 this.significantFigures = significantFigures;
117 this.bucketWidth = null;
118 this.buckets.clear();
119 }
120 return this;
121 }
122
123 /**
124 * Return the number of buckets in this histogram.
125 * @return the number of buckets.
126 */
127 public int getBucketCount() {
128 return bucketCount;
129 }
130
131 /**
132 * Set the number of buckets that this histogram will use.
133 * @param count the number of buckets
134 * @return this histogram, useful for method-chaining
135 * @see #DEFAULT_BUCKET_COUNT
136 */
137 public Histogram<T> setBucketCount( int count ) {
138 if (count != this.bucketCount) {
139 this.bucketCount = count;
140 this.bucketWidth = null;
141 this.buckets.clear();
142 }
143 return this;
144 }
145
146 /**
147 * Get the buckets in this histogram. If the histogram has not yet been computed, this method will cause it to be generated.
148 * The resulting list should not be modified.
149 * @return the histogram buckets.
150 */
151 public List<Bucket> getBuckets() {
152 compute();
153 return this.buckets;
154 }
155
156 protected void compute() {
157 // Only compute if there is not already a histogram ...
158 if (this.bucketWidth != null) return;
159
160 // Find the lower and upper bounds of the histogram using the strategy ...
161 T lowerBound = this.bucketingStrategy.getLowerBound();
162 T upperBound = this.bucketingStrategy.getUpperBound();
163
164 // Find the actual minimum and maximum values ...
165 T actualMinimum = this.actualValueStrategy.getLowerBound();
166 T actualMaximum = this.actualValueStrategy.getUpperBound();
167
168 // Create the buckets ...
169 List<T> boundaries = getBucketBoundaries(this.math, lowerBound, upperBound, actualMinimum, actualMaximum, this.bucketCount, this.significantFigures);
170 this.buckets.clear();
171 int numBuckets = boundaries.isEmpty() ? 0 : boundaries.size() - 1;
172 for (int i = 0; i != numBuckets; ++i) {
173 this.buckets.add(new Bucket(boundaries.get(i), boundaries.get(i + 1)));
174 }
175
176 // Create the histogram by adding values to each range ...
177 Iterator<Bucket> intervalIterator = this.buckets.iterator();
178 Bucket currentInterval = null;
179 for (T value : this.values) {
180 while (currentInterval == null || currentInterval.checkValue(value, !intervalIterator.hasNext()) > 0) {
181 if (!intervalIterator.hasNext()) break;
182 currentInterval = intervalIterator.next();
183 }
184 if (currentInterval != null) currentInterval.addValue(value);
185 }
186 }
187
188 /**
189 * Return the total number of values that have gone into this histogram.
190 * @return the total number of values
191 * @see Bucket#getPercentageOfValues()
192 */
193 public long getTotalNumberOfValues() {
194 return this.values.size();
195 }
196
197 protected float getMaximumPercentage() {
198 float maxPercentage = 0.0f;
199 for (Bucket bucket : this.buckets) {
200 maxPercentage = Math.max(maxPercentage, bucket.getPercentageOfValues());
201 }
202 return maxPercentage;
203 }
204
205 protected long getMaximumCount() {
206 long maxCount = 0l;
207 for (Bucket bucket : this.buckets) {
208 maxCount = Math.max(maxCount, bucket.getNumberOfValues());
209 }
210 return maxCount;
211 }
212
213 /**
214 * Generate a textual (horizontal) bar graph of this histogram.
215 * @param maxBarLength the maximum bar length, or 0 if the bar length is to represent actual counts
216 * @return the strings that make up the histogram
217 */
218 public List<String> getTextGraph( int maxBarLength ) {
219 compute();
220 if (maxBarLength < 1) maxBarLength = (int)this.getMaximumCount();
221 final float barLengthForHundredPercent = this.buckets.isEmpty() ? maxBarLength : 100.0f * maxBarLength / getMaximumPercentage();
222 final String fullLengthBar = StringUtil.createString('*', (int)barLengthForHundredPercent);
223 List<String> result = new LinkedList<String>();
224 // First calculate the labels and the max length ...
225 int maxLowerBoundLength = 0;
226 int maxUpperBoundLength = 0;
227 for (Bucket bucket : this.buckets) {
228 maxLowerBoundLength = Math.max(bucket.getLowerBound().toString().length(), maxLowerBoundLength);
229 maxUpperBoundLength = Math.max(bucket.getUpperBound().toString().length(), maxUpperBoundLength);
230 }
231
232 // Create the header ...
233 int rangeWidth = 1 + maxLowerBoundLength + 3 + maxUpperBoundLength + 1;
234 int barWidth = maxBarLength + 20;
235 result.add(StringUtil.justifyLeft("Ranges", rangeWidth, ' ') + " Distribution");
236 result.add(StringUtil.createString('-', rangeWidth) + ' ' + StringUtil.createString('-', barWidth));
237 for (Bucket bucket : this.buckets) {
238 float percent = bucket.getPercentageOfValues();
239 long number = bucket.getNumberOfValues();
240 StringBuilder sb = new StringBuilder();
241 sb.append("[");
242 sb.append(StringUtil.justifyLeft(bucket.getLowerBound().toString(), maxLowerBoundLength, ' '));
243 sb.append(" - ");
244 sb.append(StringUtil.justifyLeft(bucket.getUpperBound().toString(), maxUpperBoundLength, ' '));
245 sb.append("] ");
246 int barLength = Math.max((int)(barLengthForHundredPercent * percent / 100.0f), 0);
247 if (barLength == 0 && number != 0) barLength = 1; // make sure there is a bar for all non-zero buckets
248 sb.append(fullLengthBar.substring(0, barLength));
249 if (number != 0) {
250 sb.append(" ");
251 sb.append(number);
252 sb.append(" (");
253 sb.append(new DecimalFormat("###.#").format(percent));
254 sb.append("%)");
255 }
256 result.add(sb.toString());
257 }
258 return result;
259 }
260
261 protected static <T> List<T> getBucketBoundaries( MathOperations<T> math, T lowerBound, T upperBound, T actualMinimum, T actualMaximum, int bucketCount, int bucketWidthSigFigs ) {
262 lowerBound = math.compare(lowerBound, actualMinimum) < 0 ? actualMinimum : lowerBound;
263 upperBound = math.compare(actualMaximum, upperBound) < 0 ? actualMaximum : upperBound;
264 if (math.compare(lowerBound, upperBound) == 0) {
265 List<T> boundaries = new ArrayList<T>();
266 boundaries.add(lowerBound);
267 boundaries.add(upperBound);
268 return boundaries;
269 }
270 final boolean extraLowerBucketNeeded = math.compare(lowerBound, actualMinimum) > 0;
271 final boolean extraUpperBucketNeeded = math.compare(actualMaximum, upperBound) > 0;
272 if (extraLowerBucketNeeded) --bucketCount;
273 if (extraUpperBucketNeeded) --bucketCount;
274
275 // Compute the delta between the lower and upper bound ...
276 T totalWidth = math.subtract(upperBound, lowerBound);
277 int totalWidthScale = math.getExponentInScientificNotation(totalWidth);
278
279 // Modify the lower bound by rounding down to the next lower meaningful value,
280 // using the scale of the totalWidth to determine how to round down.
281 T roundedLowerBound = math.roundDown(lowerBound, -totalWidthScale);
282 T roundedUpperBound = math.roundUp(upperBound, -totalWidthScale);
283
284 // Create the ranges ...
285 double finalLowerBound = math.doubleValue(roundedLowerBound);
286 double finalUpperBound = math.doubleValue(roundedUpperBound);
287 double finalBucketCount = bucketCount;
288 double bucketWidth = (finalUpperBound - finalLowerBound) / finalBucketCount;
289
290 // DoubleOperations doubleOps = new DoubleOperations();
291 // bucketWidth = doubleOps.keepSignificantFigures(bucketWidth,bucketWidthSigFigs);
292
293 List<T> boundaries = new ArrayList<T>();
294 if (bucketWidth > 0.0d) {
295 if (extraLowerBucketNeeded) boundaries.add(actualMinimum);
296 double nextBoundary = finalLowerBound;
297 for (int i = 0; i != bucketCount; ++i) {
298 boundaries.add(math.create(nextBoundary));
299 nextBoundary = nextBoundary + bucketWidth;
300 // nextBoundary = doubleOps.roundUp(nextBoundary + bucketWidth, bucketWidthSigFigs );
301 }
302 boundaries.add(roundedUpperBound);
303 if (extraUpperBucketNeeded) boundaries.add(actualMaximum);
304 }
305 return boundaries;
306 }
307
308 /**
309 * Represents a bucket in a histogram.
310 */
311 public class Bucket implements Comparable<Bucket> {
312
313 private final T lowerBound;
314 private final T upperBound;
315 private final T width;
316 private long numValues;
317
318 protected Bucket( T lowerBound, T upperBound ) {
319 this.lowerBound = lowerBound;
320 this.upperBound = upperBound;
321 this.width = Histogram.this.math.subtract(upperBound, lowerBound);
322 }
323
324 /**
325 * Get the lower bound of this bucket.
326 * @return the lower bound
327 */
328 public T getLowerBound() {
329 return lowerBound;
330 }
331
332 /**
333 * Get the upper bound of this bucket.
334 * @return the upper bound
335 */
336 public T getUpperBound() {
337 return upperBound;
338 }
339
340 /**
341 * Get the width of this bucket.
342 * @return the width
343 */
344 public T getWidth() {
345 return this.width;
346 }
347
348 /**
349 * Return the percentage of values in the histogram that appear in this bucket.
350 * @return the percentage of all values in the histogram that appear in this bucket.
351 */
352 public float getPercentageOfValues() {
353 float total = Histogram.this.getTotalNumberOfValues();
354 if (total == 0.0f) return 0.0f;
355 float numValuesFloat = this.numValues;
356 return 100.0f * numValuesFloat / total;
357 }
358
359 /**
360 * Add a value to this bucket
361 * @param value
362 */
363 protected void addValue( T value ) {
364 ++this.numValues;
365 }
366
367 /**
368 * Get the number of values in this bucket.
369 * @return the number of values
370 */
371 public long getNumberOfValues() {
372 return this.numValues;
373 }
374
375 /**
376 * Check whether the value fits in this bucket.
377 * @param value the value to check
378 * @param isLast
379 * @return 0 if the value fits in this bucket, -1 if the value fits in a prior bucket, or 1 if the value fits in a later
380 * bucket
381 */
382 public int checkValue( T value, boolean isLast ) {
383 if (Histogram.this.math.compare(this.lowerBound, value) > 0) return -1;
384 if (isLast) {
385 if (Histogram.this.math.compare(value, this.upperBound) > 0) return 1;
386 } else {
387 if (Histogram.this.math.compare(value, this.upperBound) >= 0) return 1;
388 }
389 return 0;
390 }
391
392 public int compareTo( Bucket that ) {
393 // This is lower if 'that' has a lowerBound that is greater than 'this' lower bound ...
394 if (Histogram.this.math.compare(this.lowerBound, that.lowerBound) < 0) return -1;
395 if (Histogram.this.math.compare(this.lowerBound, that.lowerBound) > 0) return 1;
396 // The lower bounds are the same, so 'this' is lower if 'that' has an upperBound that is greater than 'this' lower
397 // bound ...
398 if (Histogram.this.math.compare(this.upperBound, that.upperBound) < 0) return -1;
399 if (Histogram.this.math.compare(this.upperBound, that.upperBound) > 0) return 1;
400 return 0;
401 }
402
403 protected Class<T> getNumberClass() {
404 return Histogram.this.math.getOperandClass();
405 }
406
407 @Override
408 public boolean equals( Object obj ) {
409 if (obj != null && obj.getClass() == this.getClass()) {
410 Bucket that = (Bucket)obj;
411 if (this.getNumberClass().isAssignableFrom(that.getNumberClass())) {
412 if (Histogram.this.math.compare(this.lowerBound, that.lowerBound) != 0) return false;
413 if (Histogram.this.math.compare(this.upperBound, that.upperBound) != 0) return false;
414 if (Histogram.this.math.compare(this.width, that.width) != 0) return false;
415 return true;
416 }
417 }
418 return false;
419 }
420
421 @Override
422 public String toString() {
423 return "[" + this.lowerBound + "," + this.upperBound + ")";
424 }
425
426 }
427
428 public abstract class BucketingStrategy {
429
430 public List<T> getValues() {
431 return Histogram.this.values;
432 }
433
434 public abstract T getLowerBound();
435
436 public abstract T getUpperBound();
437 }
438
439 public class DefaultBucketingStrategy extends BucketingStrategy {
440
441 @Override
442 public T getLowerBound() {
443 if (getValues().isEmpty()) return Histogram.this.math.createZeroValue();
444 return getValues().get(0);
445 }
446
447 @Override
448 public T getUpperBound() {
449 if (getValues().isEmpty()) return Histogram.this.math.createZeroValue();
450 return getValues().get(getValues().size() - 1);
451 }
452 }
453
454 public class ExplicitBucketingStrategy extends BucketingStrategy {
455
456 private final T lowerBound;
457 private final T upperBound;
458
459 protected ExplicitBucketingStrategy( T lowerBound, T upperBound ) {
460 this.lowerBound = lowerBound;
461 this.upperBound = upperBound;
462 }
463
464 @Override
465 public T getLowerBound() {
466 return this.lowerBound;
467 }
468
469 @Override
470 public T getUpperBound() {
471 return this.upperBound;
472 }
473 }
474
475 public class StandardDeviationBucketingStrategy extends BucketingStrategy {
476
477 private final double median;
478 private final double standardDeviation;
479 private final int numberOfDeviationsAboveAndBelow;
480
481 protected StandardDeviationBucketingStrategy( double median, double standardDeviation, int numDeviationsAboveAndBelow ) {
482 this.median = median;
483 this.standardDeviation = Math.abs(standardDeviation);
484 this.numberOfDeviationsAboveAndBelow = Math.abs(numDeviationsAboveAndBelow);
485 }
486
487 @Override
488 public T getLowerBound() {
489 double lower = this.median - (standardDeviation * numberOfDeviationsAboveAndBelow);
490 return Histogram.this.math.create(lower);
491 }
492
493 @Override
494 public T getUpperBound() {
495 double upper = this.median + (standardDeviation * numberOfDeviationsAboveAndBelow);
496 return Histogram.this.math.create(upper);
497 }
498 }
499
500 }