View Javadoc

1   /*
2    * ModeShape (http://www.modeshape.org)
3    * See the COPYRIGHT.txt file distributed with this work for information
4    * regarding copyright ownership.  Some portions may be licensed
5    * to Red Hat, Inc. under one or more contributor license agreements.
6    * See the AUTHORS.txt file in the distribution for a full listing of 
7    * individual contributors.
8    *
9    * ModeShape is free software. Unless otherwise indicated, all code in ModeShape
10   * is licensed to you under the terms of the GNU Lesser General Public License as
11   * published by the Free Software Foundation; either version 2.1 of
12   * the License, or (at your option) any later version.
13   * 
14   * ModeShape is distributed in the hope that it will be useful,
15   * but WITHOUT ANY WARRANTY; without even the implied warranty of
16   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17   * Lesser General Public License for more details.
18   *
19   * You should have received a copy of the GNU Lesser General Public
20   * License along with this software; if not, write to the Free
21   * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
22   * 02110-1301 USA, or see the FSF site: http://www.fsf.org.
23   */
24  package org.modeshape.search.lucene;
25  
26  import java.math.BigDecimal;
27  import java.math.BigInteger;
28  
29  /**
30   * Utility for working with Lucene field values.
31   */
32  public class FieldUtil {
33  
34      /**
35       * Creates a canonical string representation of the supplied {@link BigDecimal} value, whereby all string representations are
36       * lexicographically sortable. This makes it possible to store the wide range of values that can be represented by BigDecimal,
37       * while still enabling sorting and range queries.
38       * <p>
39       * This canonical form represents all decimal values using a prescribed format, which is based upon <a
40       * href="http://www.mail-archive.com/java-user@lucene.apache.org/msg23632.html">Steven Rowe's suggestion</a> but with
41       * modifications to handle variable-length exponents (per his suggestion in the last sentence), use spaces between fields on
42       * where required (for minimal length), and utilize an optimized (e.g., shorter) form when the value is '0' or the exponent is
43       * '0'. Thus, this format contains only digits (e.g., '0'..'9') and the '-' character.
44       * 
45       * <pre>
46       *     &lt;significand-sign>&lt;exponent-sign>&lt;exponent-length> &lt;exponent>&lt;significand>
47       * </pre>
48       * 
49       * where:
50       * <ul>
51       * <li>the <b>significand</b> is the part of the number containing the significant figures, and is a (big) integer value
52       * obtained from the BigDecimal using {@link BigDecimal#unscaledValue()};</li>
53       * <li>the <b>exponent</b> is the integer used to define the number of factors of 10 that are applied to the significand,
54       * obtained by computing <code>value.precision() - value.scale() - 1</code>;</li>
55       * </ul>
56       * Thus the fields are defined as:
57       * <ul>
58       * <li>the <code>&lt;significand-sign></code> is '-' if the significand is negative, '0' if equal to zero, or '1' if positive;
59       * </li>
60       * <li>the <code>&lt;exponent-sign></code> is '-' if the exponent is negative, '0' if equal to zero, or '1' if positive; if
61       * '0', then the <code>&lt;exponent-length></code> and <code>&lt;exponent></code> fields are not written;</li>
62       * <li>the <code>&lt;exponent-length></code> is the postive value representing the length of the <code>&lt;exponent></code>,
63       * and is not included when the <code>&lt;exponent-sign></code> is '0';</li>
64       * <li>the <code>&lt;exponent></code> is the integer used to define the number of factors of 10 that are applied to the
65       * significand, obtained by computing <code>value.precision() - value.scale() - 1</code>;</li>
66       * <li>the <code>&lt;significand></code> is the part of the number containing the significant figures, and is a (big) integer
67       * value obtained from the BigDecimal using {@link BigDecimal#unscaledValue()};</li>
68       * </ul>
69       * In the case of a negative significand, the <code>&lt;significand></code> field is negated such that each digit is replaced
70       * with <code>(base - digit - 1)</code> and appended by 'A' (which is greater than all other digits) to ensure that
71       * significands with greater precision are ordered before those that share significand prefixes but have lesser precision.
72       * </p>
73       * <p>
74       * Thus, the format for a negative BigDecimal value becomes:
75       * 
76       * <pre>
77       *     -&lt;reversed-exponent-sign>&lt;negated-exponent-length> &lt;negated-exponent>&lt;significand>&lt;sentinel>
78       * </pre>
79       * 
80       * where the <code>&lt;sentinel></code> is always 'A'. Note that the exponent length field is also negated.
81       * </p>
82       * <h3>Examples</h3>
83       * <p>
84       * Here are several examples that show BigDecimal values and their corresponding canonical string representation:
85       * 
86       * <pre>
87       *    +5.E-3     => 1-1 65
88       *    +1.E-2     => 1-1 71
89       *    +1.0E-2    => 1-1 71
90       *    +1.0000E-2 => 1-1 71
91       *    +1.1E-2    => 1-1 711
92       *    +1.11E-2   => 1-1 7111
93       *    +1.2E-2    => 1-1 712
94       *    +5.E-2     => 1-1 75
95       *    +7.3E+2    => 111 273
96       *    +7.4E+2    => 111 274
97       *    +7.45E+2   => 111 2745
98       *    +8.7654E+3 => 111 387654
99       * </pre>
100      * 
101      * Here is how a BigDecimal value of {@link BigDecimal#ZERO zero} is represented:
102      * 
103      * <pre>
104      *     0.0E0     => 0
105      * </pre>
106      * 
107      * BigDecimal values with an exponent of '0' are represented as follows:
108      * 
109      * <pre>
110      *    +1.2E0     => 1012
111      *    -1.2E0     => -087A
112      * </pre>
113      * 
114      * And here are some negative value examples:
115      * 
116      * <pre>
117      *    -8.7654E+3 => --8 612345A
118      *    -7.45E+2   => --8 7254A
119      *    -7.4E+2    => --8 725A
120      *    -7.3E+2    => --8 726A
121      *    -5.E-2     => -18 24A
122      *    -1.2E-2    => -18 287A
123      *    -1.11E-2   => -18 2888A
124      *    -1.1E-2    => -18 288A
125      *    -1.0000E-2 => -18 28A
126      *    -1.0E-2    => -18 28A
127      *    -1.E-2     => -18 28A
128      *    -5.E-3     => -18 34A
129      *    -5.E-4     => -18 44A
130      * </pre>
131      * 
132      * </p>
133      * <p>
134      * This canonical form is valid for all values of {@link BigDecimal}.
135      * </p>
136      * 
137      * @param value the value to be converted into its canonical form; may not be null
138      * @return the canonical string representation; never null or empty
139      * @see #stringToDecimal(String)
140      */
141     public static String decimalToString( BigDecimal value ) {
142         StringBuilder sb = new StringBuilder();
143         boolean negate = false;
144         // <sigificand-sign> field
145         switch (value.signum()) {
146             case -1:
147                 sb.append('-');
148                 negate = true;
149                 break;
150             case 1:
151                 sb.append('1');
152                 break;
153             default:
154                 return "0";
155         }
156 
157         // <exponent-sign>, <exponent-length> and <exponent> fields
158         long exponent = value.precision() - value.scale() - 1;
159         if (exponent == 0) {
160             sb.append('0');
161         } else {
162             if (negate) exponent = -exponent;
163             String exponentField = String.valueOf(Math.abs(exponent));
164             int length = exponentField.length();
165             char sign = exponent > 0 ? '1' : '-';
166             if (exponent < 0) exponentField = negate(exponentField);
167             // <exponent-length>
168             String lengthField = String.valueOf(length);
169             if (negate) lengthField = negate(lengthField);
170             sb.append(sign).append(lengthField).append(' ').append(exponentField);
171         }
172 
173         // <significand>
174         if (negate) value = value.negate();
175         StringBuilder significand = new StringBuilder(value.unscaledValue().toString());
176         removeTralingZeros(significand);
177 
178         // Append the significand (and the sentinel character)...
179         sb.append(negate ? negate(significand).append('A') : significand);
180 
181         return sb.toString();
182     }
183 
184     /**
185      * Converts the canonical string representation of a {@link BigDecimal} value into the object form.
186      * <p>
187      * See {@link #decimalToString(BigDecimal)} to documentation of the canonical form.
188      * </p>
189      * 
190      * @param value the canonical string representation; may not be null or empty
191      * @return the BigDecimal representation; never null
192      * @see #decimalToString(BigDecimal)
193      */
194     public static BigDecimal stringToDecimal( String value ) {
195         assert value != null;
196         assert value.length() != 0;
197         if ("0".equals(value)) return BigDecimal.ZERO;
198 
199         boolean negate = false;
200         if (value.charAt(0) == '-') {
201             // Negative, so remove the trailing sentinel ...
202             assert value.charAt(value.length() - 1) == 'A';
203             value = value.substring(0, value.length() - 1);
204             negate = true;
205         }
206 
207         // <exponent-sign>, <exponent-length> and <exponent> fields
208         long exponent = 0L;
209         boolean negateExponent = false;
210         int endIndex = 0;
211         switch (value.charAt(1)) {
212             case '0':
213                 value = value.substring(2);
214                 break;
215             case '-':
216                 negateExponent = true;
217                 // $FALL-THROUGH$
218             case '1':
219             default:
220                 // Read in the <exponent-length>
221                 int indexOfSpace = value.indexOf(' ', 2);
222                 String lengthField = value.substring(2, indexOfSpace);
223                 if (negate) lengthField = negate(lengthField);
224                 int lengthOfExponent = Integer.parseInt(lengthField);
225                 // Read in the <exponent> (after the space) ...
226                 int startIndex = indexOfSpace + 1;
227                 endIndex = startIndex + lengthOfExponent;
228                 String exponentField = value.substring(startIndex, endIndex);
229                 exponent = Long.parseLong(negateExponent ? negate(exponentField) : exponentField);
230                 if (negate) negateExponent = !negateExponent;
231                 if (negateExponent) exponent = -exponent;
232                 value = value.substring(endIndex);
233         }
234 
235         // <significand>
236         if (negate) {
237             value = negate(value);
238         }
239         BigInteger significand = new BigInteger(value);
240         int scale = (int)(value.length() - exponent - 1);
241 
242         // Now create the result ...
243         return new BigDecimal(negate ? significand.negate() : significand, scale);
244     }
245 
246     /**
247      * Compute the "negated" string, which replaces the digits (0 becomes 9, 1 becomes 8, ... and 9 becomes 0).
248      * 
249      * @param value the input string; may not be null
250      * @return the negated string; never null
251      * @see #negate(StringBuilder)
252      */
253     protected static String negate( String value ) {
254         return negate(new StringBuilder(value)).toString();
255     }
256 
257     /**
258      * Compute the "negated" string, which replaces the digits (0 becomes 9, 1 becomes 8, ... and 9 becomes 0).
259      * 
260      * @param value the input string; may not be null
261      * @return the negated string; never null
262      * @see #negate(String)
263      */
264     protected static StringBuilder negate( StringBuilder value ) {
265         for (int i = 0, len = value.length(); i != len; ++i) {
266             char c = value.charAt(i);
267             if (c == ' ' || c == '-') continue;
268             value.setCharAt(i, (char)('9' - c + '0'));
269         }
270         return value;
271     }
272 
273     /**
274      * Utility to remove the trailing 0's.
275      * 
276      * @param sb the input string builder; may not be null
277      */
278     protected static void removeTralingZeros( StringBuilder sb ) {
279         int endIndex = sb.length();
280         if (endIndex > 0) {
281             --endIndex;
282             int index = endIndex;
283             while (sb.charAt(index) == '0') {
284                 --index;
285             }
286             if (index < endIndex) sb.delete(index + 1, endIndex + 1);
287         }
288     }
289 
290     /* Prevent instantiation */
291     private FieldUtil() {
292     }
293 }