001 /*
002 * JBoss, Home of Professional Open Source.
003 * Copyright 2008, Red Hat Middleware LLC, and individual contributors
004 * as indicated by the @author tags. See the copyright.txt file in the
005 * distribution for a full listing of individual contributors.
006 *
007 * This is free software; you can redistribute it and/or modify it
008 * under the terms of the GNU Lesser General Public License as
009 * published by the Free Software Foundation; either version 2.1 of
010 * the License, or (at your option) any later version.
011 *
012 * This software is distributed in the hope that it will be useful,
013 * but WITHOUT ANY WARRANTY; without even the implied warranty of
014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
015 * Lesser General Public License for more details.
016 *
017 * You should have received a copy of the GNU Lesser General Public
018 * License along with this software; if not, write to the Free
019 * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
020 * 02110-1301 USA, or see the FSF site: http://www.fsf.org.
021 */
022 package org.jboss.dna.common.stats;
023
024 import java.util.Collections;
025 import java.util.Comparator;
026 import java.util.LinkedList;
027 import java.util.List;
028 import java.util.concurrent.locks.Lock;
029 import net.jcip.annotations.ThreadSafe;
030 import org.jboss.dna.common.math.MathOperations;
031 import org.jboss.dna.common.text.Inflector;
032 import org.jboss.dna.common.util.StringUtil;
033
034 /**
035 * Encapsulation of the statistics for a series of values to which new values are frequently added. The statistics include the
036 * {@link #getMinimum() minimum}, {@link #getMaximum() maximum}, {@link #getTotal() total (aggregate sum)},
037 * {@link #getMean() mean (average)}, {@link #getMedian() median}, {@link #getStandardDeviation() standard deviation} and the
038 * {@link #getHistogram() histogram} of the values.
039 * <p>
040 * This class uses an efficient running calculation of the mean and standard deviation that is not as susceptible to roundoff
041 * errors as other traditional algorithms. The recursive algorithm is as follows, where M is the median value, sigma is the
042 * standard deviation, and S is a variable used in the calculation of sigma:
043 *
044 * <pre>
045 * M(1) = x(1)
046 * S(1) = 0
047 * M(k) = M(k-1) + ( x(k) - M(k-1) ) / k
048 * S(k) = S(k-1) + ( x(k) - M(k-1) ) * (x(k) - M(k))
049 * </pre>
050 *
051 * Then, the standard deviation for n values in x is
052 *
053 * <pre>
054 * sigma = sqrt(S(n) / n)
055 * </pre>
056 *
057 * </p>
058 * Unlike the other quantities, the median value (the value at which half of the values are greater and half the values are lower)
059 * cannot be calculated incrementally. Therefore, this class does record the values so that the median can be properly calculated.
060 * This fact should be kept in mind when performing statistics on large numbers of values.
061 * </p>
062 * <p>
063 * This class is threadsafe.
064 * </p>
065 * @param <T> the number type for these statistics
066 */
067 @ThreadSafe
068 public class DetailedStatistics<T extends Number> extends SimpleStatistics<T> {
069
070 private T median;
071 private Double medianValue;
072 private double s = 0.0d; // used in the calculation of standard deviation (sigma)
073 private double sigma = 0.0d;
074 private final List<T> values = new LinkedList<T>();
075 private final List<T> unmodifiableValues = Collections.unmodifiableList(this.values);
076 private Histogram<T> histogram;
077
078 public DetailedStatistics( MathOperations<T> operations ) {
079 super(operations);
080 this.medianValue = 0.0d;
081 this.median = this.math.createZeroValue();
082 }
083
084 /**
085 * Get the values that have been recorded in these statistics. The contents of this list may change if new values are
086 * {@link #add(Number) added} in another thread.
087 * @return the unmodifiable collection of values, in insertion order
088 */
089 public List<T> getValues() {
090 return this.unmodifiableValues;
091 }
092
093 @Override
094 protected void doAddValue( T value ) {
095 if (value == null) {
096 return;
097 }
098 double previousMean = this.getMeanValue();
099 super.doAddValue(value);
100 this.values.add(value);
101 this.medianValue = null;
102
103 // Calculate the mean and standard deviation ...
104 int count = getCount();
105 if (count == 1) {
106 this.s = 0.0d;
107 this.sigma = 0.0d;
108 } else {
109 double dValue = value.doubleValue();
110 double dCount = count;
111 // M(k) = M(k-1) + ( x(k) - M(k-1) ) / k
112 double meanValue = previousMean + ((dValue - previousMean) / dCount);
113 // S(k) = S(k-1) + ( x(k) - M(k-1) ) * ( x(k) - M(k) )
114 this.s = this.s + (dValue - previousMean) * (dValue - meanValue);
115 // sigma = sqrt( S(n) / (n-1) )
116 this.sigma = Math.sqrt(this.s / dCount);
117 }
118 }
119
120 /**
121 * Return the approximate mean (average) value represented as an instance of the operand type. Note that this may truncate if
122 * the operand type is not able to have the required precision. For the accurate mean, see {@link #getMedianValue() }.
123 * @return the mean (average), or 0.0 if the {@link #getCount() count} is 0
124 */
125 public T getMedian() {
126 getMedianValue();
127 return this.median;
128 }
129
130 /**
131 * Return the median value.
132 * @return the median value, or 0.0 if the {@link #getCount() count} is 0
133 * @see #getMedian()
134 */
135 public double getMedianValue() {
136 Lock lock = this.getLock().writeLock();
137 try {
138 lock.lock();
139 int count = this.values.size();
140 if (count == 0) {
141 return 0.0d;
142 }
143 if (this.medianValue == null) {
144 // Sort the values in numerical order..
145 Comparator<T> comparator = this.math.getComparator();
146 Collections.sort(this.values, comparator);
147 this.medianValue = 0.0d;
148 // If there is only one value, then the median is that value ...
149 if (count == 1) {
150 this.medianValue = this.values.get(0).doubleValue();
151 }
152 // If there is an odd number of values, find value that is in the middle ..
153 else if (count % 2 != 0) {
154 this.medianValue = this.values.get(((count + 1) / 2) - 1).doubleValue();
155 }
156 // Otherwise, there is an even number of values, so find the average of the middle two values ...
157 else {
158 int upperMiddleValueIndex = count / 2;
159 int lowerMiddleValueIndex = upperMiddleValueIndex - 1;
160 double lowerValue = this.values.get(lowerMiddleValueIndex).doubleValue();
161 double upperValue = this.values.get(upperMiddleValueIndex).doubleValue();
162 this.medianValue = (lowerValue + upperValue) / 2.0d;
163 }
164 this.median = this.math.create(this.medianValue);
165 this.histogram = null;
166 }
167 } finally {
168 lock.unlock();
169 }
170 return this.medianValue;
171 }
172
173 /**
174 * Return the standard deviation. The standard deviation is a measure of the variation in a series of values. Values with a
175 * lower standard deviation has less variance in the values than a series of values with a higher standard deviation.
176 * @return the standard deviation, or 0.0 if the {@link #getCount() count} is 0 or if all of the values are the same.
177 */
178 public double getStandardDeviation() {
179 Lock lock = this.getLock().readLock();
180 lock.lock();
181 try {
182 return this.sigma;
183 } finally {
184 lock.unlock();
185 }
186 }
187
188 /**
189 * Return the histogram of the {@link #getValues() values}. This method returns a histogram where all of the buckets are
190 * distributed normally and all have the same width. In this case, the 'numSigmas' should be set to 0. For other variations,
191 * see {@link #getHistogram(int)}.
192 * @return the histogram
193 * @see #getHistogram(int)
194 */
195 public Histogram<T> getHistogram() {
196 return getHistogram(0);
197 }
198
199 /**
200 * Return the histogram of the {@link #getValues() values}. This method is capable of creating two kinds of histograms. The
201 * first kind is a histogram where all of the buckets are distributed normally and all have the same width. In this case, the
202 * 'numSigmas' should be set to 0. See {@link #getHistogram()}.
203 * <p>
204 * The second kind of histogram is more useful when most of the data that is clustered near one value. This histogram is
205 * focused around the values that are up to 'numSigmas' above and below the {@link #getMedian() median}, and all values
206 * outside of this range are placed in the first and last bucket.
207 * </p>
208 * @param numSigmas the number of standard deviations from the {@link #getMedian() median}, or 0 if the buckets of the
209 * histogram should be evenly distributed
210 * @return the histogram
211 * @see #getHistogram()
212 */
213 public Histogram<T> getHistogram( int numSigmas ) {
214 Lock lock = this.getLock().writeLock();
215 lock.lock();
216 try {
217 Histogram<T> hist = new Histogram<T>(this.math, this.values);
218 if (numSigmas > 0) {
219 // The 'getMediaValue()' method will reset the current histogram, so don't set it...
220 hist.setStrategy(this.getMedianValue(), this.getStandardDeviation(), numSigmas);
221 }
222 this.histogram = hist;
223 return this.histogram;
224 } finally {
225 lock.unlock();
226 }
227 }
228
229 @Override
230 protected void doReset() {
231 super.doReset();
232 this.medianValue = 0.0d;
233 this.median = this.math.createZeroValue();
234 this.s = 0.0d;
235 this.sigma = 0.0d;
236 this.values.clear();
237 }
238
239 @Override
240 public String toString() {
241 int count = this.getCount();
242 String samples = Inflector.getInstance().pluralize("sample", count);
243 return StringUtil.createString("{0} {1}: min={2}; avg={3}; median={4}; stddev={5}; max={6}", count, samples, this.getMinimum(), this.getMean(), this.getMedian(), this.getStandardDeviation(),
244 this.getMaximum());
245 }
246
247 }