View Javadoc

1   /*
2    * ModeShape (http://www.modeshape.org)
3    * See the COPYRIGHT.txt file distributed with this work for information
4    * regarding copyright ownership.  Some portions may be licensed
5    * to Red Hat, Inc. under one or more contributor license agreements.
6    * See the AUTHORS.txt file in the distribution for a full listing of 
7    * individual contributors. 
8    *
9    * ModeShape is free software. Unless otherwise indicated, all code in ModeShape
10   * is licensed to you under the terms of the GNU Lesser General Public License as
11   * published by the Free Software Foundation; either version 2.1 of
12   * the License, or (at your option) any later version.
13   *
14   * ModeShape is distributed in the hope that it will be useful,
15   * but WITHOUT ANY WARRANTY; without even the implied warranty of
16   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17   * Lesser General Public License for more details.
18   *
19   * You should have received a copy of the GNU Lesser General Public
20   * License along with this software; if not, write to the Free
21   * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
22   * 02110-1301 USA, or see the FSF site: http://www.fsf.org.
23   */
24  package org.modeshape.common.statistic;
25  
26  import java.util.Collections;
27  import java.util.Comparator;
28  import java.util.LinkedList;
29  import java.util.List;
30  import java.util.concurrent.locks.Lock;
31  import net.jcip.annotations.ThreadSafe;
32  import org.modeshape.common.math.MathOperations;
33  import org.modeshape.common.text.Inflector;
34  import org.modeshape.common.util.StringUtil;
35  
36  /**
37   * Encapsulation of the statistics for a series of values to which new values are frequently added. The statistics include the
38   * {@link #getMinimum() minimum}, {@link #getMaximum() maximum}, {@link #getTotal() total (aggregate sum)},
39   * {@link #getMean() mean (average)}, {@link #getMedian() median}, {@link #getStandardDeviation() standard deviation} and the
40   * {@link #getHistogram() histogram} of the values.
41   * <p>
42   * This class uses an efficient running calculation of the mean and standard deviation that is not as susceptible to roundoff
43   * errors as other traditional algorithms. The recursive algorithm is as follows, where M is the median value, sigma is the
44   * standard deviation, and S is a variable used in the calculation of sigma:
45   * 
46   * <pre>
47   *   M(1) = x(1)
48   *   S(1) = 0
49   *   M(k) = M(k-1) + ( x(k) - M(k-1) ) / k
50   *   S(k) = S(k-1) + ( x(k) - M(k-1) ) * (x(k) - M(k))
51   * </pre>
52   * 
53   * Then, the standard deviation for n values in x is
54   * 
55   * <pre>
56   * sigma = sqrt(S(n) / n)
57   * </pre>
58   * 
59   * </p>
60   * Unlike the other quantities, the median value (the value at which half of the values are greater and half the values are lower)
61   * cannot be calculated incrementally. Therefore, this class does record the values so that the median can be properly calculated.
62   * This fact should be kept in mind when performing statistics on large numbers of values.
63   * </p>
64   * <p>
65   * This class is threadsafe.
66   * </p>
67   * @param <T> the number type for these statistics
68   */
69  @ThreadSafe
70  public class DetailedStatistics<T extends Number> extends SimpleStatistics<T> {
71  
72      private T median;
73      private Double medianValue;
74      private double s = 0.0d; // used in the calculation of standard deviation (sigma)
75      private double sigma = 0.0d;
76      private final List<T> values = new LinkedList<T>();
77      private final List<T> unmodifiableValues = Collections.unmodifiableList(this.values);
78      private Histogram<T> histogram;
79  
80      public DetailedStatistics( MathOperations<T> operations ) {
81          super(operations);
82          this.medianValue = 0.0d;
83          this.median = this.math.createZeroValue();
84      }
85  
86      /**
87       * Get the values that have been recorded in these statistics. The contents of this list may change if new values are
88       * {@link #add(Number) added} in another thread.
89       * @return the unmodifiable collection of values, in insertion order
90       */
91      public List<T> getValues() {
92          return this.unmodifiableValues;
93      }
94  
95      @Override
96      protected void doAddValue( T value ) {
97          if (value == null) {
98              return;
99          }
100         double previousMean = this.getMeanValue();
101         super.doAddValue(value);
102         this.values.add(value);
103         this.medianValue = null;
104 
105         // Calculate the mean and standard deviation ...
106         int count = getCount();
107         if (count == 1) {
108             this.s = 0.0d;
109             this.sigma = 0.0d;
110         } else {
111             double dValue = value.doubleValue();
112             double dCount = count;
113             // M(k) = M(k-1) + ( x(k) - M(k-1) ) / k
114             double meanValue = previousMean + ((dValue - previousMean) / dCount);
115             // S(k) = S(k-1) + ( x(k) - M(k-1) ) * ( x(k) - M(k) )
116             this.s = this.s + (dValue - previousMean) * (dValue - meanValue);
117             // sigma = sqrt( S(n) / (n-1) )
118             this.sigma = Math.sqrt(this.s / dCount);
119         }
120     }
121 
122     /**
123      * Return the approximate mean (average) value represented as an instance of the operand type. Note that this may truncate if
124      * the operand type is not able to have the required precision. For the accurate mean, see {@link #getMedianValue() }.
125      * @return the mean (average), or 0.0 if the {@link #getCount() count} is 0
126      */
127     public T getMedian() {
128         getMedianValue();
129         return this.median;
130     }
131 
132     /**
133      * Return the median value.
134      * @return the median value, or 0.0 if the {@link #getCount() count} is 0
135      * @see #getMedian()
136      */
137     public double getMedianValue() {
138         Lock lock = this.getLock().writeLock();
139         try {
140             lock.lock();
141             int count = this.values.size();
142             if (count == 0) {
143                 return 0.0d;
144             }
145             if (this.medianValue == null) {
146                 // Sort the values in numerical order..
147                 Comparator<T> comparator = this.math.getComparator();
148                 Collections.sort(this.values, comparator);
149                 this.medianValue = 0.0d;
150                 // If there is only one value, then the median is that value ...
151                 if (count == 1) {
152                     this.medianValue = this.values.get(0).doubleValue();
153                 }
154                 // If there is an odd number of values, find value that is in the middle ..
155                 else if (count % 2 != 0) {
156                     this.medianValue = this.values.get(((count + 1) / 2) - 1).doubleValue();
157                 }
158                 // Otherwise, there is an even number of values, so find the average of the middle two values ...
159                 else {
160                     int upperMiddleValueIndex = count / 2;
161                     int lowerMiddleValueIndex = upperMiddleValueIndex - 1;
162                     double lowerValue = this.values.get(lowerMiddleValueIndex).doubleValue();
163                     double upperValue = this.values.get(upperMiddleValueIndex).doubleValue();
164                     this.medianValue = (lowerValue + upperValue) / 2.0d;
165                 }
166                 this.median = this.math.create(this.medianValue);
167                 this.histogram = null;
168             }
169         } finally {
170             lock.unlock();
171         }
172         return this.medianValue;
173     }
174 
175     /**
176      * Return the standard deviation. The standard deviation is a measure of the variation in a series of values. Values with a
177      * lower standard deviation has less variance in the values than a series of values with a higher standard deviation.
178      * @return the standard deviation, or 0.0 if the {@link #getCount() count} is 0 or if all of the values are the same.
179      */
180     public double getStandardDeviation() {
181         Lock lock = this.getLock().readLock();
182         lock.lock();
183         try {
184             return this.sigma;
185         } finally {
186             lock.unlock();
187         }
188     }
189 
190     /**
191      * Return the histogram of the {@link #getValues() values}. This method returns a histogram where all of the buckets are
192      * distributed normally and all have the same width. In this case, the 'numSigmas' should be set to 0. For other variations,
193      * see {@link #getHistogram(int)}.
194      * @return the histogram
195      * @see #getHistogram(int)
196      */
197     public Histogram<T> getHistogram() {
198         return getHistogram(0);
199     }
200 
201     /**
202      * Return the histogram of the {@link #getValues() values}. This method is capable of creating two kinds of histograms. The
203      * first kind is a histogram where all of the buckets are distributed normally and all have the same width. In this case, the
204      * 'numSigmas' should be set to 0. See {@link #getHistogram()}.
205      * <p>
206      * The second kind of histogram is more useful when most of the data that is clustered near one value. This histogram is
207      * focused around the values that are up to 'numSigmas' above and below the {@link #getMedian() median}, and all values
208      * outside of this range are placed in the first and last bucket.
209      * </p>
210      * @param numSigmas the number of standard deviations from the {@link #getMedian() median}, or 0 if the buckets of the
211      * histogram should be evenly distributed
212      * @return the histogram
213      * @see #getHistogram()
214      */
215     public Histogram<T> getHistogram( int numSigmas ) {
216         Lock lock = this.getLock().writeLock();
217         lock.lock();
218         try {
219             Histogram<T> hist = new Histogram<T>(this.math, this.values);
220             if (numSigmas > 0) {
221                 // The 'getMediaValue()' method will reset the current histogram, so don't set it...
222                 hist.setStrategy(this.getMedianValue(), this.getStandardDeviation(), numSigmas);
223             }
224             this.histogram = hist;
225             return this.histogram;
226         } finally {
227             lock.unlock();
228         }
229     }
230 
231     @Override
232     protected void doReset() {
233         super.doReset();
234         this.medianValue = 0.0d;
235         this.median = this.math.createZeroValue();
236         this.s = 0.0d;
237         this.sigma = 0.0d;
238         this.values.clear();
239     }
240 
241     @Override
242     public String toString() {
243         int count = this.getCount();
244         String samples = Inflector.getInstance().pluralize("sample", count);
245         return StringUtil.createString("{0} {1}: min={2}; avg={3}; median={4}; stddev={5}; max={6}", count, samples, this.getMinimum(), this.getMean(), this.getMedian(), this.getStandardDeviation(),
246                                        this.getMaximum());
247     }
248 
249 }