001 /*
002 * JBoss DNA (http://www.jboss.org/dna)
003 * See the COPYRIGHT.txt file distributed with this work for information
004 * regarding copyright ownership. Some portions may be licensed
005 * to Red Hat, Inc. under one or more contributor license agreements.
006 * See the AUTHORS.txt file in the distribution for a full listing of
007 * individual contributors.
008 *
009 * JBoss DNA is free software. Unless otherwise indicated, all code in JBoss DNA
010 * is licensed to you under the terms of the GNU Lesser General Public License as
011 * published by the Free Software Foundation; either version 2.1 of
012 * the License, or (at your option) any later version.
013 *
014 * JBoss DNA is distributed in the hope that it will be useful,
015 * but WITHOUT ANY WARRANTY; without even the implied warranty of
016 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
017 * Lesser General Public License for more details.
018 *
019 * You should have received a copy of the GNU Lesser General Public
020 * License along with this software; if not, write to the Free
021 * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
022 * 02110-1301 USA, or see the FSF site: http://www.fsf.org.
023 */
024 package org.jboss.dna.common.statistic;
025
026 import java.util.Collections;
027 import java.util.Comparator;
028 import java.util.LinkedList;
029 import java.util.List;
030 import java.util.concurrent.locks.Lock;
031 import net.jcip.annotations.ThreadSafe;
032 import org.jboss.dna.common.math.MathOperations;
033 import org.jboss.dna.common.text.Inflector;
034 import org.jboss.dna.common.util.StringUtil;
035
036 /**
037 * Encapsulation of the statistics for a series of values to which new values are frequently added. The statistics include the
038 * {@link #getMinimum() minimum}, {@link #getMaximum() maximum}, {@link #getTotal() total (aggregate sum)},
039 * {@link #getMean() mean (average)}, {@link #getMedian() median}, {@link #getStandardDeviation() standard deviation} and the
040 * {@link #getHistogram() histogram} of the values.
041 * <p>
042 * This class uses an efficient running calculation of the mean and standard deviation that is not as susceptible to roundoff
043 * errors as other traditional algorithms. The recursive algorithm is as follows, where M is the median value, sigma is the
044 * standard deviation, and S is a variable used in the calculation of sigma:
045 *
046 * <pre>
047 * M(1) = x(1)
048 * S(1) = 0
049 * M(k) = M(k-1) + ( x(k) - M(k-1) ) / k
050 * S(k) = S(k-1) + ( x(k) - M(k-1) ) * (x(k) - M(k))
051 * </pre>
052 *
053 * Then, the standard deviation for n values in x is
054 *
055 * <pre>
056 * sigma = sqrt(S(n) / n)
057 * </pre>
058 *
059 * </p>
060 * Unlike the other quantities, the median value (the value at which half of the values are greater and half the values are lower)
061 * cannot be calculated incrementally. Therefore, this class does record the values so that the median can be properly calculated.
062 * This fact should be kept in mind when performing statistics on large numbers of values.
063 * </p>
064 * <p>
065 * This class is threadsafe.
066 * </p>
067 * @param <T> the number type for these statistics
068 */
069 @ThreadSafe
070 public class DetailedStatistics<T extends Number> extends SimpleStatistics<T> {
071
072 private T median;
073 private Double medianValue;
074 private double s = 0.0d; // used in the calculation of standard deviation (sigma)
075 private double sigma = 0.0d;
076 private final List<T> values = new LinkedList<T>();
077 private final List<T> unmodifiableValues = Collections.unmodifiableList(this.values);
078 private Histogram<T> histogram;
079
080 public DetailedStatistics( MathOperations<T> operations ) {
081 super(operations);
082 this.medianValue = 0.0d;
083 this.median = this.math.createZeroValue();
084 }
085
086 /**
087 * Get the values that have been recorded in these statistics. The contents of this list may change if new values are
088 * {@link #add(Number) added} in another thread.
089 * @return the unmodifiable collection of values, in insertion order
090 */
091 public List<T> getValues() {
092 return this.unmodifiableValues;
093 }
094
095 @Override
096 protected void doAddValue( T value ) {
097 if (value == null) {
098 return;
099 }
100 double previousMean = this.getMeanValue();
101 super.doAddValue(value);
102 this.values.add(value);
103 this.medianValue = null;
104
105 // Calculate the mean and standard deviation ...
106 int count = getCount();
107 if (count == 1) {
108 this.s = 0.0d;
109 this.sigma = 0.0d;
110 } else {
111 double dValue = value.doubleValue();
112 double dCount = count;
113 // M(k) = M(k-1) + ( x(k) - M(k-1) ) / k
114 double meanValue = previousMean + ((dValue - previousMean) / dCount);
115 // S(k) = S(k-1) + ( x(k) - M(k-1) ) * ( x(k) - M(k) )
116 this.s = this.s + (dValue - previousMean) * (dValue - meanValue);
117 // sigma = sqrt( S(n) / (n-1) )
118 this.sigma = Math.sqrt(this.s / dCount);
119 }
120 }
121
122 /**
123 * Return the approximate mean (average) value represented as an instance of the operand type. Note that this may truncate if
124 * the operand type is not able to have the required precision. For the accurate mean, see {@link #getMedianValue() }.
125 * @return the mean (average), or 0.0 if the {@link #getCount() count} is 0
126 */
127 public T getMedian() {
128 getMedianValue();
129 return this.median;
130 }
131
132 /**
133 * Return the median value.
134 * @return the median value, or 0.0 if the {@link #getCount() count} is 0
135 * @see #getMedian()
136 */
137 public double getMedianValue() {
138 Lock lock = this.getLock().writeLock();
139 try {
140 lock.lock();
141 int count = this.values.size();
142 if (count == 0) {
143 return 0.0d;
144 }
145 if (this.medianValue == null) {
146 // Sort the values in numerical order..
147 Comparator<T> comparator = this.math.getComparator();
148 Collections.sort(this.values, comparator);
149 this.medianValue = 0.0d;
150 // If there is only one value, then the median is that value ...
151 if (count == 1) {
152 this.medianValue = this.values.get(0).doubleValue();
153 }
154 // If there is an odd number of values, find value that is in the middle ..
155 else if (count % 2 != 0) {
156 this.medianValue = this.values.get(((count + 1) / 2) - 1).doubleValue();
157 }
158 // Otherwise, there is an even number of values, so find the average of the middle two values ...
159 else {
160 int upperMiddleValueIndex = count / 2;
161 int lowerMiddleValueIndex = upperMiddleValueIndex - 1;
162 double lowerValue = this.values.get(lowerMiddleValueIndex).doubleValue();
163 double upperValue = this.values.get(upperMiddleValueIndex).doubleValue();
164 this.medianValue = (lowerValue + upperValue) / 2.0d;
165 }
166 this.median = this.math.create(this.medianValue);
167 this.histogram = null;
168 }
169 } finally {
170 lock.unlock();
171 }
172 return this.medianValue;
173 }
174
175 /**
176 * Return the standard deviation. The standard deviation is a measure of the variation in a series of values. Values with a
177 * lower standard deviation has less variance in the values than a series of values with a higher standard deviation.
178 * @return the standard deviation, or 0.0 if the {@link #getCount() count} is 0 or if all of the values are the same.
179 */
180 public double getStandardDeviation() {
181 Lock lock = this.getLock().readLock();
182 lock.lock();
183 try {
184 return this.sigma;
185 } finally {
186 lock.unlock();
187 }
188 }
189
190 /**
191 * Return the histogram of the {@link #getValues() values}. This method returns a histogram where all of the buckets are
192 * distributed normally and all have the same width. In this case, the 'numSigmas' should be set to 0. For other variations,
193 * see {@link #getHistogram(int)}.
194 * @return the histogram
195 * @see #getHistogram(int)
196 */
197 public Histogram<T> getHistogram() {
198 return getHistogram(0);
199 }
200
201 /**
202 * Return the histogram of the {@link #getValues() values}. This method is capable of creating two kinds of histograms. The
203 * first kind is a histogram where all of the buckets are distributed normally and all have the same width. In this case, the
204 * 'numSigmas' should be set to 0. See {@link #getHistogram()}.
205 * <p>
206 * The second kind of histogram is more useful when most of the data that is clustered near one value. This histogram is
207 * focused around the values that are up to 'numSigmas' above and below the {@link #getMedian() median}, and all values
208 * outside of this range are placed in the first and last bucket.
209 * </p>
210 * @param numSigmas the number of standard deviations from the {@link #getMedian() median}, or 0 if the buckets of the
211 * histogram should be evenly distributed
212 * @return the histogram
213 * @see #getHistogram()
214 */
215 public Histogram<T> getHistogram( int numSigmas ) {
216 Lock lock = this.getLock().writeLock();
217 lock.lock();
218 try {
219 Histogram<T> hist = new Histogram<T>(this.math, this.values);
220 if (numSigmas > 0) {
221 // The 'getMediaValue()' method will reset the current histogram, so don't set it...
222 hist.setStrategy(this.getMedianValue(), this.getStandardDeviation(), numSigmas);
223 }
224 this.histogram = hist;
225 return this.histogram;
226 } finally {
227 lock.unlock();
228 }
229 }
230
231 @Override
232 protected void doReset() {
233 super.doReset();
234 this.medianValue = 0.0d;
235 this.median = this.math.createZeroValue();
236 this.s = 0.0d;
237 this.sigma = 0.0d;
238 this.values.clear();
239 }
240
241 @Override
242 public String toString() {
243 int count = this.getCount();
244 String samples = Inflector.getInstance().pluralize("sample", count);
245 return StringUtil.createString("{0} {1}: min={2}; avg={3}; median={4}; stddev={5}; max={6}", count, samples, this.getMinimum(), this.getMean(), this.getMedian(), this.getStandardDeviation(),
246 this.getMaximum());
247 }
248
249 }