001    /*
002     * JBoss DNA (http://www.jboss.org/dna)
003     * See the COPYRIGHT.txt file distributed with this work for information
004     * regarding copyright ownership.  Some portions may be licensed
005     * to Red Hat, Inc. under one or more contributor license agreements.
006     * See the AUTHORS.txt file in the distribution for a full listing of 
007     * individual contributors. 
008     *
009     * JBoss DNA is free software. Unless otherwise indicated, all code in JBoss DNA
010     * is licensed to you under the terms of the GNU Lesser General Public License as
011     * published by the Free Software Foundation; either version 2.1 of
012     * the License, or (at your option) any later version.
013     *
014     * JBoss DNA is distributed in the hope that it will be useful,
015     * but WITHOUT ANY WARRANTY; without even the implied warranty of
016     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
017     * Lesser General Public License for more details.
018     *
019     * You should have received a copy of the GNU Lesser General Public
020     * License along with this software; if not, write to the Free
021     * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
022     * 02110-1301 USA, or see the FSF site: http://www.fsf.org.
023     */
024    package org.jboss.dna.common.statistic;
025    
026    import java.math.BigDecimal;
027    import java.text.DecimalFormat;
028    import java.util.ArrayList;
029    import java.util.Arrays;
030    import java.util.Collections;
031    import java.util.Iterator;
032    import java.util.LinkedList;
033    import java.util.List;
034    import org.jboss.dna.common.math.MathOperations;
035    import org.jboss.dna.common.util.StringUtil;
036    
037    public class Histogram<T extends Number> {
038    
039        public static final int DEFAULT_BUCKET_COUNT = 10;
040        public static final int DEFAULT_SIGNIFICANT_FIGURES = 4;
041    
042        protected final MathOperations<T> math;
043        protected final List<T> values;
044        private int bucketCount = DEFAULT_BUCKET_COUNT;
045        private int significantFigures = DEFAULT_SIGNIFICANT_FIGURES;
046        private BigDecimal bucketWidth;
047        private LinkedList<Bucket> buckets;
048        private BucketingStrategy actualValueStrategy = new DefaultBucketingStrategy();
049        private BucketingStrategy bucketingStrategy = actualValueStrategy;
050    
051        public Histogram( MathOperations<T> operations, List<T> values ) {
052            this.math = operations;
053            this.values = new LinkedList<T>(values);
054            this.buckets = new LinkedList<Bucket>();
055            this.bucketWidth = null;
056            // Sort the data using natural order ...
057            Collections.sort(this.values, this.math.getComparator());
058        }
059    
060        public Histogram( MathOperations<T> operations, T... values ) {
061            this(operations, Arrays.asList(values));
062        }
063    
064        public BucketingStrategy getStrategy() {
065            return this.bucketingStrategy;
066        }
067    
068        /**
069         * @return math
070         */
071        public MathOperations<T> getMathOperations() {
072            return this.math;
073        }
074    
075        /**
076         * Set the histogram to use the standard deviation to determine the bucket sizes.
077         * @param median
078         * @param standardDeviation
079         * @param sigma
080         */
081        public void setStrategy( double median, double standardDeviation, int sigma ) {
082            this.bucketingStrategy = new StandardDeviationBucketingStrategy(median, standardDeviation, sigma);
083            this.bucketWidth = null;
084        }
085    
086        /**
087         * Set the histogram to use the supplied minimum and maximum values to determine the bucket size.
088         * @param minimum
089         * @param maximum
090         */
091        public void setStrategy( T minimum, T maximum ) {
092            this.bucketingStrategy = new ExplicitBucketingStrategy(minimum, maximum);
093            this.bucketWidth = null;
094        }
095    
096        /**
097         * Set the histogram to use the actual minimum and maximum values to determine the bucket sizes.
098         */
099        public void setStrategyToDefault() {
100            this.bucketingStrategy = this.actualValueStrategy;
101            this.bucketWidth = null;
102        }
103    
104        public int getSignificantFigures() {
105            return significantFigures;
106        }
107    
108        /**
109         * Set the number of significant figures used in the calculation of the bucket widths.
110         * @param significantFigures the number of significant figures for the bucket widths
111         * @return this histogram, useful for method-chaining
112         * @see #DEFAULT_SIGNIFICANT_FIGURES
113         */
114        public Histogram<T> setSignificantFigures( int significantFigures ) {
115            if (significantFigures != this.significantFigures) {
116                this.significantFigures = significantFigures;
117                this.bucketWidth = null;
118                this.buckets.clear();
119            }
120            return this;
121        }
122    
123        /**
124         * Return the number of buckets in this histogram.
125         * @return the number of buckets.
126         */
127        public int getBucketCount() {
128            return bucketCount;
129        }
130    
131        /**
132         * Set the number of buckets that this histogram will use.
133         * @param count the number of buckets
134         * @return this histogram, useful for method-chaining
135         * @see #DEFAULT_BUCKET_COUNT
136         */
137        public Histogram<T> setBucketCount( int count ) {
138            if (count != this.bucketCount) {
139                this.bucketCount = count;
140                this.bucketWidth = null;
141                this.buckets.clear();
142            }
143            return this;
144        }
145    
146        /**
147         * Get the buckets in this histogram. If the histogram has not yet been computed, this method will cause it to be generated.
148         * The resulting list should not be modified.
149         * @return the histogram buckets.
150         */
151        public List<Bucket> getBuckets() {
152            compute();
153            return this.buckets;
154        }
155    
156        protected void compute() {
157            // Only compute if there is not already a histogram ...
158            if (this.bucketWidth != null) return;
159    
160            // Find the lower and upper bounds of the histogram using the strategy ...
161            T lowerBound = this.bucketingStrategy.getLowerBound();
162            T upperBound = this.bucketingStrategy.getUpperBound();
163    
164            // Find the actual minimum and maximum values ...
165            T actualMinimum = this.actualValueStrategy.getLowerBound();
166            T actualMaximum = this.actualValueStrategy.getUpperBound();
167    
168            // Create the buckets ...
169            List<T> boundaries = getBucketBoundaries(this.math, lowerBound, upperBound, actualMinimum, actualMaximum, this.bucketCount, this.significantFigures);
170            this.buckets.clear();
171            int numBuckets = boundaries.isEmpty() ? 0 : boundaries.size() - 1;
172            for (int i = 0; i != numBuckets; ++i) {
173                this.buckets.add(new Bucket(boundaries.get(i), boundaries.get(i + 1)));
174            }
175    
176            // Create the histogram by adding values to each range ...
177            Iterator<Bucket> intervalIterator = this.buckets.iterator();
178            Bucket currentInterval = null;
179            for (T value : this.values) {
180                while (currentInterval == null || currentInterval.checkValue(value, !intervalIterator.hasNext()) > 0) {
181                    if (!intervalIterator.hasNext()) break;
182                    currentInterval = intervalIterator.next();
183                }
184                if (currentInterval != null) currentInterval.addValue(value);
185            }
186        }
187    
188        /**
189         * Return the total number of values that have gone into this histogram.
190         * @return the total number of values
191         * @see Bucket#getPercentageOfValues()
192         */
193        public long getTotalNumberOfValues() {
194            return this.values.size();
195        }
196    
197        protected float getMaximumPercentage() {
198            float maxPercentage = 0.0f;
199            for (Bucket bucket : this.buckets) {
200                maxPercentage = Math.max(maxPercentage, bucket.getPercentageOfValues());
201            }
202            return maxPercentage;
203        }
204    
205        protected long getMaximumCount() {
206            long maxCount = 0l;
207            for (Bucket bucket : this.buckets) {
208                maxCount = Math.max(maxCount, bucket.getNumberOfValues());
209            }
210            return maxCount;
211        }
212    
213        /**
214         * Generate a textual (horizontal) bar graph of this histogram.
215         * @param maxBarLength the maximum bar length, or 0 if the bar length is to represent actual counts
216         * @return the strings that make up the histogram
217         */
218        public List<String> getTextGraph( int maxBarLength ) {
219            compute();
220            if (maxBarLength < 1) maxBarLength = (int)this.getMaximumCount();
221            final float barLengthForHundredPercent = this.buckets.isEmpty() ? maxBarLength : 100.0f * maxBarLength / getMaximumPercentage();
222            final String fullLengthBar = StringUtil.createString('*', (int)barLengthForHundredPercent);
223            List<String> result = new LinkedList<String>();
224            // First calculate the labels and the max length ...
225            int maxLowerBoundLength = 0;
226            int maxUpperBoundLength = 0;
227            for (Bucket bucket : this.buckets) {
228                maxLowerBoundLength = Math.max(bucket.getLowerBound().toString().length(), maxLowerBoundLength);
229                maxUpperBoundLength = Math.max(bucket.getUpperBound().toString().length(), maxUpperBoundLength);
230            }
231    
232            // Create the header ...
233            int rangeWidth = 1 + maxLowerBoundLength + 3 + maxUpperBoundLength + 1;
234            int barWidth = maxBarLength + 20;
235            result.add(StringUtil.justifyLeft("Ranges", rangeWidth, ' ') + " Distribution");
236            result.add(StringUtil.createString('-', rangeWidth) + ' ' + StringUtil.createString('-', barWidth));
237            for (Bucket bucket : this.buckets) {
238                float percent = bucket.getPercentageOfValues();
239                long number = bucket.getNumberOfValues();
240                StringBuilder sb = new StringBuilder();
241                sb.append("[");
242                sb.append(StringUtil.justifyLeft(bucket.getLowerBound().toString(), maxLowerBoundLength, ' '));
243                sb.append(" - ");
244                sb.append(StringUtil.justifyLeft(bucket.getUpperBound().toString(), maxUpperBoundLength, ' '));
245                sb.append("] ");
246                int barLength = Math.max((int)(barLengthForHundredPercent * percent / 100.0f), 0);
247                if (barLength == 0 && number != 0) barLength = 1; // make sure there is a bar for all non-zero buckets
248                sb.append(fullLengthBar.substring(0, barLength));
249                if (number != 0) {
250                    sb.append(" ");
251                    sb.append(number);
252                    sb.append(" (");
253                    sb.append(new DecimalFormat("###.#").format(percent));
254                    sb.append("%)");
255                }
256                result.add(sb.toString());
257            }
258            return result;
259        }
260    
261        protected static <T> List<T> getBucketBoundaries( MathOperations<T> math, T lowerBound, T upperBound, T actualMinimum, T actualMaximum, int bucketCount, int bucketWidthSigFigs ) {
262            lowerBound = math.compare(lowerBound, actualMinimum) < 0 ? actualMinimum : lowerBound;
263            upperBound = math.compare(actualMaximum, upperBound) < 0 ? actualMaximum : upperBound;
264            if (math.compare(lowerBound, upperBound) == 0) {
265                List<T> boundaries = new ArrayList<T>();
266                boundaries.add(lowerBound);
267                boundaries.add(upperBound);
268                return boundaries;
269            }
270            final boolean extraLowerBucketNeeded = math.compare(lowerBound, actualMinimum) > 0;
271            final boolean extraUpperBucketNeeded = math.compare(actualMaximum, upperBound) > 0;
272            if (extraLowerBucketNeeded) --bucketCount;
273            if (extraUpperBucketNeeded) --bucketCount;
274    
275            // Compute the delta between the lower and upper bound ...
276            T totalWidth = math.subtract(upperBound, lowerBound);
277            int totalWidthScale = math.getExponentInScientificNotation(totalWidth);
278    
279            // Modify the lower bound by rounding down to the next lower meaningful value,
280            // using the scale of the totalWidth to determine how to round down.
281            T roundedLowerBound = math.roundDown(lowerBound, -totalWidthScale);
282            T roundedUpperBound = math.roundUp(upperBound, -totalWidthScale);
283    
284            // Create the ranges ...
285            double finalLowerBound = math.doubleValue(roundedLowerBound);
286            double finalUpperBound = math.doubleValue(roundedUpperBound);
287            double finalBucketCount = bucketCount;
288            double bucketWidth = (finalUpperBound - finalLowerBound) / finalBucketCount;
289    
290            // DoubleOperations doubleOps = new DoubleOperations();
291            // bucketWidth = doubleOps.keepSignificantFigures(bucketWidth,bucketWidthSigFigs);
292    
293            List<T> boundaries = new ArrayList<T>();
294            if (bucketWidth > 0.0d) {
295                if (extraLowerBucketNeeded) boundaries.add(actualMinimum);
296                double nextBoundary = finalLowerBound;
297                for (int i = 0; i != bucketCount; ++i) {
298                    boundaries.add(math.create(nextBoundary));
299                    nextBoundary = nextBoundary + bucketWidth;
300                    // nextBoundary = doubleOps.roundUp(nextBoundary + bucketWidth, bucketWidthSigFigs );
301                }
302                boundaries.add(roundedUpperBound);
303                if (extraUpperBucketNeeded) boundaries.add(actualMaximum);
304            }
305            return boundaries;
306        }
307    
308        /**
309         * Represents a bucket in a histogram.
310         */
311        public class Bucket implements Comparable<Bucket> {
312    
313            private final T lowerBound;
314            private final T upperBound;
315            private final T width;
316            private long numValues;
317    
318            protected Bucket( T lowerBound, T upperBound ) {
319                this.lowerBound = lowerBound;
320                this.upperBound = upperBound;
321                this.width = Histogram.this.math.subtract(upperBound, lowerBound);
322            }
323    
324            /**
325             * Get the lower bound of this bucket.
326             * @return the lower bound
327             */
328            public T getLowerBound() {
329                return lowerBound;
330            }
331    
332            /**
333             * Get the upper bound of this bucket.
334             * @return the upper bound
335             */
336            public T getUpperBound() {
337                return upperBound;
338            }
339    
340            /**
341             * Get the width of this bucket.
342             * @return the width
343             */
344            public T getWidth() {
345                return this.width;
346            }
347    
348            /**
349             * Return the percentage of values in the histogram that appear in this bucket.
350             * @return the percentage of all values in the histogram that appear in this bucket.
351             */
352            public float getPercentageOfValues() {
353                float total = Histogram.this.getTotalNumberOfValues();
354                if (total == 0.0f) return 0.0f;
355                float numValuesFloat = this.numValues;
356                return 100.0f * numValuesFloat / total;
357            }
358    
359            /**
360             * Add a value to this bucket
361             * @param value
362             */
363            protected void addValue( T value ) {
364                ++this.numValues;
365            }
366    
367            /**
368             * Get the number of values in this bucket.
369             * @return the number of values
370             */
371            public long getNumberOfValues() {
372                return this.numValues;
373            }
374    
375            /**
376             * Check whether the value fits in this bucket.
377             * @param value the value to check
378             * @param isLast
379             * @return 0 if the value fits in this bucket, -1 if the value fits in a prior bucket, or 1 if the value fits in a later
380             * bucket
381             */
382            public int checkValue( T value, boolean isLast ) {
383                if (Histogram.this.math.compare(this.lowerBound, value) > 0) return -1;
384                if (isLast) {
385                    if (Histogram.this.math.compare(value, this.upperBound) > 0) return 1;
386                } else {
387                    if (Histogram.this.math.compare(value, this.upperBound) >= 0) return 1;
388                }
389                return 0;
390            }
391    
392            public int compareTo( Bucket that ) {
393                // This is lower if 'that' has a lowerBound that is greater than 'this' lower bound ...
394                if (Histogram.this.math.compare(this.lowerBound, that.lowerBound) < 0) return -1;
395                if (Histogram.this.math.compare(this.lowerBound, that.lowerBound) > 0) return 1;
396                // The lower bounds are the same, so 'this' is lower if 'that' has an upperBound that is greater than 'this' lower
397                // bound ...
398                if (Histogram.this.math.compare(this.upperBound, that.upperBound) < 0) return -1;
399                if (Histogram.this.math.compare(this.upperBound, that.upperBound) > 0) return 1;
400                return 0;
401            }
402    
403            protected Class<T> getNumberClass() {
404                return Histogram.this.math.getOperandClass();
405            }
406    
407            @Override
408            public boolean equals( Object obj ) {
409                if (obj != null && obj.getClass() == this.getClass()) {
410                    Bucket that = (Bucket)obj;
411                    if (this.getNumberClass().isAssignableFrom(that.getNumberClass())) {
412                        if (Histogram.this.math.compare(this.lowerBound, that.lowerBound) != 0) return false;
413                        if (Histogram.this.math.compare(this.upperBound, that.upperBound) != 0) return false;
414                        if (Histogram.this.math.compare(this.width, that.width) != 0) return false;
415                        return true;
416                    }
417                }
418                return false;
419            }
420    
421            @Override
422            public String toString() {
423                return "[" + this.lowerBound + "," + this.upperBound + ")";
424            }
425    
426        }
427    
428        public abstract class BucketingStrategy {
429    
430            public List<T> getValues() {
431                return Histogram.this.values;
432            }
433    
434            public abstract T getLowerBound();
435    
436            public abstract T getUpperBound();
437        }
438    
439        public class DefaultBucketingStrategy extends BucketingStrategy {
440    
441            @Override
442            public T getLowerBound() {
443                if (getValues().isEmpty()) return Histogram.this.math.createZeroValue();
444                return getValues().get(0);
445            }
446    
447            @Override
448            public T getUpperBound() {
449                if (getValues().isEmpty()) return Histogram.this.math.createZeroValue();
450                return getValues().get(getValues().size() - 1);
451            }
452        }
453    
454        public class ExplicitBucketingStrategy extends BucketingStrategy {
455    
456            private final T lowerBound;
457            private final T upperBound;
458    
459            protected ExplicitBucketingStrategy( T lowerBound, T upperBound ) {
460                this.lowerBound = lowerBound;
461                this.upperBound = upperBound;
462            }
463    
464            @Override
465            public T getLowerBound() {
466                return this.lowerBound;
467            }
468    
469            @Override
470            public T getUpperBound() {
471                return this.upperBound;
472            }
473        }
474    
475        public class StandardDeviationBucketingStrategy extends BucketingStrategy {
476    
477            private final double median;
478            private final double standardDeviation;
479            private final int numberOfDeviationsAboveAndBelow;
480    
481            protected StandardDeviationBucketingStrategy( double median, double standardDeviation, int numDeviationsAboveAndBelow ) {
482                this.median = median;
483                this.standardDeviation = Math.abs(standardDeviation);
484                this.numberOfDeviationsAboveAndBelow = Math.abs(numDeviationsAboveAndBelow);
485            }
486    
487            @Override
488            public T getLowerBound() {
489                double lower = this.median - (standardDeviation * numberOfDeviationsAboveAndBelow);
490                return Histogram.this.math.create(lower);
491            }
492    
493            @Override
494            public T getUpperBound() {
495                double upper = this.median + (standardDeviation * numberOfDeviationsAboveAndBelow);
496                return Histogram.this.math.create(upper);
497            }
498        }
499    
500    }