View Javadoc

1   /*
2    * ModeShape (http://www.modeshape.org)
3    * See the COPYRIGHT.txt file distributed with this work for information
4    * regarding copyright ownership.  Some portions may be licensed
5    * to Red Hat, Inc. under one or more contributor license agreements.
6    * See the AUTHORS.txt file in the distribution for a full listing of 
7    * individual contributors. 
8    *
9    * ModeShape is free software. Unless otherwise indicated, all code in ModeShape
10   * is licensed to you under the terms of the GNU Lesser General Public License as
11   * published by the Free Software Foundation; either version 2.1 of
12   * the License, or (at your option) any later version.
13   *
14   * ModeShape is distributed in the hope that it will be useful,
15   * but WITHOUT ANY WARRANTY; without even the implied warranty of
16   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17   * Lesser General Public License for more details.
18   *
19   * You should have received a copy of the GNU Lesser General Public
20   * License along with this software; if not, write to the Free
21   * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
22   * 02110-1301 USA, or see the FSF site: http://www.fsf.org.
23   */
24  package org.modeshape.common.statistic;
25  
26  import java.math.BigDecimal;
27  import java.text.DecimalFormat;
28  import java.util.ArrayList;
29  import java.util.Arrays;
30  import java.util.Collections;
31  import java.util.Iterator;
32  import java.util.LinkedList;
33  import java.util.List;
34  import net.jcip.annotations.NotThreadSafe;
35  import org.modeshape.common.math.MathOperations;
36  import org.modeshape.common.util.StringUtil;
37  
38  /**
39   * A representation of a histogram of values.
40   * 
41   * @param <T> the type of value
42   */
43  @NotThreadSafe
44  public class Histogram<T extends Number> {
45  
46      public static final int DEFAULT_BUCKET_COUNT = 10;
47      public static final int DEFAULT_SIGNIFICANT_FIGURES = 4;
48  
49      protected final MathOperations<T> math;
50      protected final List<T> values;
51      private int bucketCount = DEFAULT_BUCKET_COUNT;
52      private int significantFigures = DEFAULT_SIGNIFICANT_FIGURES;
53      private BigDecimal bucketWidth;
54      private LinkedList<Bucket> buckets;
55      private BucketingStrategy actualValueStrategy = new DefaultBucketingStrategy();
56      private BucketingStrategy bucketingStrategy = actualValueStrategy;
57  
58      public Histogram( MathOperations<T> operations,
59                        List<T> values ) {
60          this.math = operations;
61          this.values = new LinkedList<T>(values);
62          this.buckets = new LinkedList<Bucket>();
63          this.bucketWidth = null;
64          // Sort the data using natural order ...
65          Collections.sort(this.values, this.math.getComparator());
66      }
67  
68      public Histogram( MathOperations<T> operations,
69                        T... values ) {
70          this(operations, Arrays.asList(values));
71      }
72  
73      public BucketingStrategy getStrategy() {
74          return this.bucketingStrategy;
75      }
76  
77      /**
78       * @return math
79       */
80      public MathOperations<T> getMathOperations() {
81          return this.math;
82      }
83  
84      /**
85       * Set the histogram to use the standard deviation to determine the bucket sizes.
86       * 
87       * @param median
88       * @param standardDeviation
89       * @param sigma
90       */
91      public void setStrategy( double median,
92                               double standardDeviation,
93                               int sigma ) {
94          this.bucketingStrategy = new StandardDeviationBucketingStrategy(median, standardDeviation, sigma);
95          this.bucketWidth = null;
96      }
97  
98      /**
99       * Set the histogram to use the supplied minimum and maximum values to determine the bucket size.
100      * 
101      * @param minimum
102      * @param maximum
103      */
104     public void setStrategy( T minimum,
105                              T maximum ) {
106         this.bucketingStrategy = new ExplicitBucketingStrategy(minimum, maximum);
107         this.bucketWidth = null;
108     }
109 
110     /**
111      * Set the histogram to use the actual minimum and maximum values to determine the bucket sizes.
112      */
113     public void setStrategyToDefault() {
114         this.bucketingStrategy = this.actualValueStrategy;
115         this.bucketWidth = null;
116     }
117 
118     public int getSignificantFigures() {
119         return significantFigures;
120     }
121 
122     /**
123      * Set the number of significant figures used in the calculation of the bucket widths.
124      * 
125      * @param significantFigures the number of significant figures for the bucket widths
126      * @return this histogram, useful for method-chaining
127      * @see #DEFAULT_SIGNIFICANT_FIGURES
128      */
129     public Histogram<T> setSignificantFigures( int significantFigures ) {
130         if (significantFigures != this.significantFigures) {
131             this.significantFigures = significantFigures;
132             this.bucketWidth = null;
133             this.buckets.clear();
134         }
135         return this;
136     }
137 
138     /**
139      * Return the number of buckets in this histogram.
140      * 
141      * @return the number of buckets.
142      */
143     public int getBucketCount() {
144         return bucketCount;
145     }
146 
147     /**
148      * Set the number of buckets that this histogram will use.
149      * 
150      * @param count the number of buckets
151      * @return this histogram, useful for method-chaining
152      * @see #DEFAULT_BUCKET_COUNT
153      */
154     public Histogram<T> setBucketCount( int count ) {
155         if (count != this.bucketCount) {
156             this.bucketCount = count;
157             this.bucketWidth = null;
158             this.buckets.clear();
159         }
160         return this;
161     }
162 
163     /**
164      * Get the buckets in this histogram. If the histogram has not yet been computed, this method will cause it to be generated.
165      * The resulting list should not be modified.
166      * 
167      * @return the histogram buckets.
168      */
169     public List<Bucket> getBuckets() {
170         compute();
171         return this.buckets;
172     }
173 
174     protected void compute() {
175         // Only compute if there is not already a histogram ...
176         if (this.bucketWidth != null) return;
177 
178         // Find the lower and upper bounds of the histogram using the strategy ...
179         T lowerBound = this.bucketingStrategy.getLowerBound();
180         T upperBound = this.bucketingStrategy.getUpperBound();
181 
182         // Find the actual minimum and maximum values ...
183         T actualMinimum = this.actualValueStrategy.getLowerBound();
184         T actualMaximum = this.actualValueStrategy.getUpperBound();
185 
186         // Create the buckets ...
187         List<T> boundaries = getBucketBoundaries(this.math,
188                                                  lowerBound,
189                                                  upperBound,
190                                                  actualMinimum,
191                                                  actualMaximum,
192                                                  this.bucketCount,
193                                                  this.significantFigures);
194         this.buckets.clear();
195         int numBuckets = boundaries.isEmpty() ? 0 : boundaries.size() - 1;
196         for (int i = 0; i != numBuckets; ++i) {
197             this.buckets.add(new Bucket(boundaries.get(i), boundaries.get(i + 1)));
198         }
199 
200         // Create the histogram by adding values to each range ...
201         Iterator<Bucket> intervalIterator = this.buckets.iterator();
202         Bucket currentInterval = null;
203         for (T value : this.values) {
204             while (currentInterval == null || currentInterval.checkValue(value, !intervalIterator.hasNext()) > 0) {
205                 if (!intervalIterator.hasNext()) break;
206                 currentInterval = intervalIterator.next();
207             }
208             if (currentInterval != null) currentInterval.addValue(value);
209         }
210     }
211 
212     /**
213      * Return the total number of values that have gone into this histogram.
214      * 
215      * @return the total number of values
216      * @see Bucket#getPercentageOfValues()
217      */
218     public long getTotalNumberOfValues() {
219         return this.values.size();
220     }
221 
222     protected float getMaximumPercentage() {
223         float maxPercentage = 0.0f;
224         for (Bucket bucket : this.buckets) {
225             maxPercentage = Math.max(maxPercentage, bucket.getPercentageOfValues());
226         }
227         return maxPercentage;
228     }
229 
230     protected long getMaximumCount() {
231         long maxCount = 0l;
232         for (Bucket bucket : this.buckets) {
233             maxCount = Math.max(maxCount, bucket.getNumberOfValues());
234         }
235         return maxCount;
236     }
237 
238     /**
239      * Generate a textual (horizontal) bar graph of this histogram.
240      * 
241      * @param maxBarLength the maximum bar length, or 0 if the bar length is to represent actual counts
242      * @return the strings that make up the histogram
243      */
244     public List<String> getTextGraph( int maxBarLength ) {
245         compute();
246         if (maxBarLength < 1) maxBarLength = (int)this.getMaximumCount();
247         final float barLengthForHundredPercent = this.buckets.isEmpty() ? maxBarLength : 100.0f * maxBarLength
248                                                                                          / getMaximumPercentage();
249         final String fullLengthBar = StringUtil.createString('*', (int)barLengthForHundredPercent);
250         List<String> result = new LinkedList<String>();
251         // First calculate the labels and the max length ...
252         int maxLowerBoundLength = 0;
253         int maxUpperBoundLength = 0;
254         for (Bucket bucket : this.buckets) {
255             maxLowerBoundLength = Math.max(bucket.getLowerBound().toString().length(), maxLowerBoundLength);
256             maxUpperBoundLength = Math.max(bucket.getUpperBound().toString().length(), maxUpperBoundLength);
257         }
258 
259         // Create the header ...
260         int rangeWidth = 1 + maxLowerBoundLength + 3 + maxUpperBoundLength + 1;
261         int barWidth = maxBarLength + 20;
262         result.add(StringUtil.justifyLeft("Ranges", rangeWidth, ' ') + " Distribution");
263         result.add(StringUtil.createString('-', rangeWidth) + ' ' + StringUtil.createString('-', barWidth));
264         for (Bucket bucket : this.buckets) {
265             float percent = bucket.getPercentageOfValues();
266             long number = bucket.getNumberOfValues();
267             StringBuilder sb = new StringBuilder();
268             sb.append("[");
269             sb.append(StringUtil.justifyLeft(bucket.getLowerBound().toString(), maxLowerBoundLength, ' '));
270             sb.append(" - ");
271             sb.append(StringUtil.justifyLeft(bucket.getUpperBound().toString(), maxUpperBoundLength, ' '));
272             sb.append("] ");
273             int barLength = Math.max((int)(barLengthForHundredPercent * percent / 100.0f), 0);
274             if (barLength == 0 && number != 0) barLength = 1; // make sure there is a bar for all non-zero buckets
275             sb.append(fullLengthBar.substring(0, barLength));
276             if (number != 0) {
277                 sb.append(" ");
278                 sb.append(number);
279                 sb.append(" (");
280                 sb.append(new DecimalFormat("###.#").format(percent));
281                 sb.append("%)");
282             }
283             result.add(sb.toString());
284         }
285         return result;
286     }
287 
288     protected static <T> List<T> getBucketBoundaries( MathOperations<T> math,
289                                                       T lowerBound,
290                                                       T upperBound,
291                                                       T actualMinimum,
292                                                       T actualMaximum,
293                                                       int bucketCount,
294                                                       int bucketWidthSigFigs ) {
295         lowerBound = math.compare(lowerBound, actualMinimum) < 0 ? actualMinimum : lowerBound;
296         upperBound = math.compare(actualMaximum, upperBound) < 0 ? actualMaximum : upperBound;
297         if (math.compare(lowerBound, upperBound) == 0) {
298             List<T> boundaries = new ArrayList<T>();
299             boundaries.add(lowerBound);
300             boundaries.add(upperBound);
301             return boundaries;
302         }
303         final boolean extraLowerBucketNeeded = math.compare(lowerBound, actualMinimum) > 0;
304         final boolean extraUpperBucketNeeded = math.compare(actualMaximum, upperBound) > 0;
305         if (extraLowerBucketNeeded) --bucketCount;
306         if (extraUpperBucketNeeded) --bucketCount;
307 
308         // Compute the delta between the lower and upper bound ...
309         T totalWidth = math.subtract(upperBound, lowerBound);
310         int totalWidthScale = math.getExponentInScientificNotation(totalWidth);
311 
312         // Modify the lower bound by rounding down to the next lower meaningful value,
313         // using the scale of the totalWidth to determine how to round down.
314         T roundedLowerBound = math.roundDown(lowerBound, -totalWidthScale);
315         T roundedUpperBound = math.roundUp(upperBound, -totalWidthScale);
316 
317         // Create the ranges ...
318         double finalLowerBound = math.doubleValue(roundedLowerBound);
319         double finalUpperBound = math.doubleValue(roundedUpperBound);
320         double finalBucketCount = bucketCount;
321         double bucketWidth = (finalUpperBound - finalLowerBound) / finalBucketCount;
322 
323         // DoubleOperations doubleOps = new DoubleOperations();
324         // bucketWidth = doubleOps.keepSignificantFigures(bucketWidth,bucketWidthSigFigs);
325 
326         List<T> boundaries = new ArrayList<T>();
327         if (bucketWidth > 0.0d) {
328             if (extraLowerBucketNeeded) boundaries.add(actualMinimum);
329             double nextBoundary = finalLowerBound;
330             for (int i = 0; i != bucketCount; ++i) {
331                 boundaries.add(math.create(nextBoundary));
332                 nextBoundary = nextBoundary + bucketWidth;
333                 // nextBoundary = doubleOps.roundUp(nextBoundary + bucketWidth, bucketWidthSigFigs );
334             }
335             boundaries.add(roundedUpperBound);
336             if (extraUpperBucketNeeded) boundaries.add(actualMaximum);
337         }
338         return boundaries;
339     }
340 
341     /**
342      * Represents a bucket in a histogram.
343      */
344     public class Bucket implements Comparable<Bucket> {
345 
346         private final T lowerBound;
347         private final T upperBound;
348         private final T width;
349         private long numValues;
350 
351         protected Bucket( T lowerBound,
352                           T upperBound ) {
353             this.lowerBound = lowerBound;
354             this.upperBound = upperBound;
355             this.width = Histogram.this.math.subtract(upperBound, lowerBound);
356         }
357 
358         /**
359          * Get the lower bound of this bucket.
360          * 
361          * @return the lower bound
362          */
363         public T getLowerBound() {
364             return lowerBound;
365         }
366 
367         /**
368          * Get the upper bound of this bucket.
369          * 
370          * @return the upper bound
371          */
372         public T getUpperBound() {
373             return upperBound;
374         }
375 
376         /**
377          * Get the width of this bucket.
378          * 
379          * @return the width
380          */
381         public T getWidth() {
382             return this.width;
383         }
384 
385         /**
386          * Return the percentage of values in the histogram that appear in this bucket.
387          * 
388          * @return the percentage of all values in the histogram that appear in this bucket.
389          */
390         public float getPercentageOfValues() {
391             float total = Histogram.this.getTotalNumberOfValues();
392             if (total == 0.0f) return 0.0f;
393             float numValuesFloat = this.numValues;
394             return 100.0f * numValuesFloat / total;
395         }
396 
397         /**
398          * Add a value to this bucket
399          * 
400          * @param value
401          */
402         protected void addValue( T value ) {
403             ++this.numValues;
404         }
405 
406         /**
407          * Get the number of values in this bucket.
408          * 
409          * @return the number of values
410          */
411         public long getNumberOfValues() {
412             return this.numValues;
413         }
414 
415         /**
416          * Check whether the value fits in this bucket.
417          * 
418          * @param value the value to check
419          * @param isLast
420          * @return 0 if the value fits in this bucket, -1 if the value fits in a prior bucket, or 1 if the value fits in a later
421          *         bucket
422          */
423         public int checkValue( T value,
424                                boolean isLast ) {
425             if (Histogram.this.math.compare(this.lowerBound, value) > 0) return -1;
426             if (isLast) {
427                 if (Histogram.this.math.compare(value, this.upperBound) > 0) return 1;
428             } else {
429                 if (Histogram.this.math.compare(value, this.upperBound) >= 0) return 1;
430             }
431             return 0;
432         }
433 
434         public int compareTo( Bucket that ) {
435             // This is lower if 'that' has a lowerBound that is greater than 'this' lower bound ...
436             if (Histogram.this.math.compare(this.lowerBound, that.lowerBound) < 0) return -1;
437             if (Histogram.this.math.compare(this.lowerBound, that.lowerBound) > 0) return 1;
438             // The lower bounds are the same, so 'this' is lower if 'that' has an upperBound that is greater than 'this' lower
439             // bound ...
440             if (Histogram.this.math.compare(this.upperBound, that.upperBound) < 0) return -1;
441             if (Histogram.this.math.compare(this.upperBound, that.upperBound) > 0) return 1;
442             return 0;
443         }
444 
445         protected Class<T> getNumberClass() {
446             return Histogram.this.math.getOperandClass();
447         }
448 
449         @Override
450         public boolean equals( Object obj ) {
451             if (obj != null && obj.getClass() == this.getClass()) {
452                 Bucket that = (Bucket)obj;
453                 if (this.getNumberClass().isAssignableFrom(that.getNumberClass())) {
454                     if (Histogram.this.math.compare(this.lowerBound, that.lowerBound) != 0) return false;
455                     if (Histogram.this.math.compare(this.upperBound, that.upperBound) != 0) return false;
456                     if (Histogram.this.math.compare(this.width, that.width) != 0) return false;
457                     return true;
458                 }
459             }
460             return false;
461         }
462 
463         @Override
464         public String toString() {
465             return "[" + this.lowerBound + "," + this.upperBound + ")";
466         }
467 
468     }
469 
470     public abstract class BucketingStrategy {
471 
472         public List<T> getValues() {
473             return Histogram.this.values;
474         }
475 
476         public abstract T getLowerBound();
477 
478         public abstract T getUpperBound();
479     }
480 
481     public class DefaultBucketingStrategy extends BucketingStrategy {
482 
483         @Override
484         public T getLowerBound() {
485             if (getValues().isEmpty()) return Histogram.this.math.createZeroValue();
486             return getValues().get(0);
487         }
488 
489         @Override
490         public T getUpperBound() {
491             if (getValues().isEmpty()) return Histogram.this.math.createZeroValue();
492             return getValues().get(getValues().size() - 1);
493         }
494     }
495 
496     public class ExplicitBucketingStrategy extends BucketingStrategy {
497 
498         private final T lowerBound;
499         private final T upperBound;
500 
501         protected ExplicitBucketingStrategy( T lowerBound,
502                                              T upperBound ) {
503             this.lowerBound = lowerBound;
504             this.upperBound = upperBound;
505         }
506 
507         @Override
508         public T getLowerBound() {
509             return this.lowerBound;
510         }
511 
512         @Override
513         public T getUpperBound() {
514             return this.upperBound;
515         }
516     }
517 
518     public class StandardDeviationBucketingStrategy extends BucketingStrategy {
519 
520         private final double median;
521         private final double standardDeviation;
522         private final int numberOfDeviationsAboveAndBelow;
523 
524         protected StandardDeviationBucketingStrategy( double median,
525                                                       double standardDeviation,
526                                                       int numDeviationsAboveAndBelow ) {
527             this.median = median;
528             this.standardDeviation = Math.abs(standardDeviation);
529             this.numberOfDeviationsAboveAndBelow = Math.abs(numDeviationsAboveAndBelow);
530         }
531 
532         @Override
533         public T getLowerBound() {
534             double lower = this.median - (standardDeviation * numberOfDeviationsAboveAndBelow);
535             return Histogram.this.math.create(lower);
536         }
537 
538         @Override
539         public T getUpperBound() {
540             double upper = this.median + (standardDeviation * numberOfDeviationsAboveAndBelow);
541             return Histogram.this.math.create(upper);
542         }
543     }
544 
545 }