1 /*
2 * ModeShape (http://www.modeshape.org)
3 * See the COPYRIGHT.txt file distributed with this work for information
4 * regarding copyright ownership. Some portions may be licensed
5 * to Red Hat, Inc. under one or more contributor license agreements.
6 * See the AUTHORS.txt file in the distribution for a full listing of
7 * individual contributors.
8 *
9 * ModeShape is free software. Unless otherwise indicated, all code in ModeShape
10 * is licensed to you under the terms of the GNU Lesser General Public License as
11 * published by the Free Software Foundation; either version 2.1 of
12 * the License, or (at your option) any later version.
13 *
14 * ModeShape is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this software; if not, write to the Free
21 * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
22 * 02110-1301 USA, or see the FSF site: http://www.fsf.org.
23 */
24 package org.modeshape.search.lucene;
25
26 import java.math.BigDecimal;
27 import java.math.BigInteger;
28
29 /**
30 * Utility for working with Lucene field values.
31 */
32 public class FieldUtil {
33
34 /**
35 * Creates a canonical string representation of the supplied {@link BigDecimal} value, whereby all string representations are
36 * lexicographically sortable. This makes it possible to store the wide range of values that can be represented by BigDecimal,
37 * while still enabling sorting and range queries.
38 * <p>
39 * This canonical form represents all decimal values using a prescribed format, which is based upon <a
40 * href="http://www.mail-archive.com/java-user@lucene.apache.org/msg23632.html">Steven Rowe's suggestion</a> but with
41 * modifications to handle variable-length exponents (per his suggestion in the last sentence), use spaces between fields on
42 * where required (for minimal length), and utilize an optimized (e.g., shorter) form when the value is '0' or the exponent is
43 * '0'. Thus, this format contains only digits (e.g., '0'..'9') and the '-' character.
44 *
45 * <pre>
46 * <significand-sign><exponent-sign><exponent-length> <exponent><significand>
47 * </pre>
48 *
49 * where:
50 * <ul>
51 * <li>the <b>significand</b> is the part of the number containing the significant figures, and is a (big) integer value
52 * obtained from the BigDecimal using {@link BigDecimal#unscaledValue()};</li>
53 * <li>the <b>exponent</b> is the integer used to define the number of factors of 10 that are applied to the significand,
54 * obtained by computing <code>value.precision() - value.scale() - 1</code>;</li>
55 * </ul>
56 * Thus the fields are defined as:
57 * <ul>
58 * <li>the <code><significand-sign></code> is '-' if the significand is negative, '0' if equal to zero, or '1' if positive;
59 * </li>
60 * <li>the <code><exponent-sign></code> is '-' if the exponent is negative, '0' if equal to zero, or '1' if positive; if
61 * '0', then the <code><exponent-length></code> and <code><exponent></code> fields are not written;</li>
62 * <li>the <code><exponent-length></code> is the postive value representing the length of the <code><exponent></code>,
63 * and is not included when the <code><exponent-sign></code> is '0';</li>
64 * <li>the <code><exponent></code> is the integer used to define the number of factors of 10 that are applied to the
65 * significand, obtained by computing <code>value.precision() - value.scale() - 1</code>;</li>
66 * <li>the <code><significand></code> is the part of the number containing the significant figures, and is a (big) integer
67 * value obtained from the BigDecimal using {@link BigDecimal#unscaledValue()};</li>
68 * </ul>
69 * In the case of a negative significand, the <code><significand></code> field is negated such that each digit is replaced
70 * with <code>(base - digit - 1)</code> and appended by 'A' (which is greater than all other digits) to ensure that
71 * significands with greater precision are ordered before those that share significand prefixes but have lesser precision.
72 * </p>
73 * <p>
74 * Thus, the format for a negative BigDecimal value becomes:
75 *
76 * <pre>
77 * -<reversed-exponent-sign><negated-exponent-length> <negated-exponent><significand><sentinel>
78 * </pre>
79 *
80 * where the <code><sentinel></code> is always 'A'. Note that the exponent length field is also negated.
81 * </p>
82 * <h3>Examples</h3>
83 * <p>
84 * Here are several examples that show BigDecimal values and their corresponding canonical string representation:
85 *
86 * <pre>
87 * +5.E-3 => 1-1 65
88 * +1.E-2 => 1-1 71
89 * +1.0E-2 => 1-1 71
90 * +1.0000E-2 => 1-1 71
91 * +1.1E-2 => 1-1 711
92 * +1.11E-2 => 1-1 7111
93 * +1.2E-2 => 1-1 712
94 * +5.E-2 => 1-1 75
95 * +7.3E+2 => 111 273
96 * +7.4E+2 => 111 274
97 * +7.45E+2 => 111 2745
98 * +8.7654E+3 => 111 387654
99 * </pre>
100 *
101 * Here is how a BigDecimal value of {@link BigDecimal#ZERO zero} is represented:
102 *
103 * <pre>
104 * 0.0E0 => 0
105 * </pre>
106 *
107 * BigDecimal values with an exponent of '0' are represented as follows:
108 *
109 * <pre>
110 * +1.2E0 => 1012
111 * -1.2E0 => -087A
112 * </pre>
113 *
114 * And here are some negative value examples:
115 *
116 * <pre>
117 * -8.7654E+3 => --8 612345A
118 * -7.45E+2 => --8 7254A
119 * -7.4E+2 => --8 725A
120 * -7.3E+2 => --8 726A
121 * -5.E-2 => -18 24A
122 * -1.2E-2 => -18 287A
123 * -1.11E-2 => -18 2888A
124 * -1.1E-2 => -18 288A
125 * -1.0000E-2 => -18 28A
126 * -1.0E-2 => -18 28A
127 * -1.E-2 => -18 28A
128 * -5.E-3 => -18 34A
129 * -5.E-4 => -18 44A
130 * </pre>
131 *
132 * </p>
133 * <p>
134 * This canonical form is valid for all values of {@link BigDecimal}.
135 * </p>
136 *
137 * @param value the value to be converted into its canonical form; may not be null
138 * @return the canonical string representation; never null or empty
139 * @see #stringToDecimal(String)
140 */
141 public static String decimalToString( BigDecimal value ) {
142 StringBuilder sb = new StringBuilder();
143 boolean negate = false;
144 // <sigificand-sign> field
145 switch (value.signum()) {
146 case -1:
147 sb.append('-');
148 negate = true;
149 break;
150 case 1:
151 sb.append('1');
152 break;
153 default:
154 return "0";
155 }
156
157 // <exponent-sign>, <exponent-length> and <exponent> fields
158 long exponent = value.precision() - value.scale() - 1;
159 if (exponent == 0) {
160 sb.append('0');
161 } else {
162 if (negate) exponent = -exponent;
163 String exponentField = String.valueOf(Math.abs(exponent));
164 int length = exponentField.length();
165 char sign = exponent > 0 ? '1' : '-';
166 if (exponent < 0) exponentField = negate(exponentField);
167 // <exponent-length>
168 String lengthField = String.valueOf(length);
169 if (negate) lengthField = negate(lengthField);
170 sb.append(sign).append(lengthField).append(' ').append(exponentField);
171 }
172
173 // <significand>
174 if (negate) value = value.negate();
175 StringBuilder significand = new StringBuilder(value.unscaledValue().toString());
176 removeTralingZeros(significand);
177
178 // Append the significand (and the sentinel character)...
179 sb.append(negate ? negate(significand).append('A') : significand);
180
181 return sb.toString();
182 }
183
184 /**
185 * Converts the canonical string representation of a {@link BigDecimal} value into the object form.
186 * <p>
187 * See {@link #decimalToString(BigDecimal)} to documentation of the canonical form.
188 * </p>
189 *
190 * @param value the canonical string representation; may not be null or empty
191 * @return the BigDecimal representation; never null
192 * @see #decimalToString(BigDecimal)
193 */
194 public static BigDecimal stringToDecimal( String value ) {
195 assert value != null;
196 assert value.length() != 0;
197 if ("0".equals(value)) return BigDecimal.ZERO;
198
199 boolean negate = false;
200 if (value.charAt(0) == '-') {
201 // Negative, so remove the trailing sentinel ...
202 assert value.charAt(value.length() - 1) == 'A';
203 value = value.substring(0, value.length() - 1);
204 negate = true;
205 }
206
207 // <exponent-sign>, <exponent-length> and <exponent> fields
208 long exponent = 0L;
209 boolean negateExponent = false;
210 int endIndex = 0;
211 switch (value.charAt(1)) {
212 case '0':
213 value = value.substring(2);
214 break;
215 case '-':
216 negateExponent = true;
217 // $FALL-THROUGH$
218 case '1':
219 default:
220 // Read in the <exponent-length>
221 int indexOfSpace = value.indexOf(' ', 2);
222 String lengthField = value.substring(2, indexOfSpace);
223 if (negate) lengthField = negate(lengthField);
224 int lengthOfExponent = Integer.parseInt(lengthField);
225 // Read in the <exponent> (after the space) ...
226 int startIndex = indexOfSpace + 1;
227 endIndex = startIndex + lengthOfExponent;
228 String exponentField = value.substring(startIndex, endIndex);
229 exponent = Long.parseLong(negateExponent ? negate(exponentField) : exponentField);
230 if (negate) negateExponent = !negateExponent;
231 if (negateExponent) exponent = -exponent;
232 value = value.substring(endIndex);
233 }
234
235 // <significand>
236 if (negate) {
237 value = negate(value);
238 }
239 BigInteger significand = new BigInteger(value);
240 int scale = (int)(value.length() - exponent - 1);
241
242 // Now create the result ...
243 return new BigDecimal(negate ? significand.negate() : significand, scale);
244 }
245
246 /**
247 * Compute the "negated" string, which replaces the digits (0 becomes 9, 1 becomes 8, ... and 9 becomes 0).
248 *
249 * @param value the input string; may not be null
250 * @return the negated string; never null
251 * @see #negate(StringBuilder)
252 */
253 protected static String negate( String value ) {
254 return negate(new StringBuilder(value)).toString();
255 }
256
257 /**
258 * Compute the "negated" string, which replaces the digits (0 becomes 9, 1 becomes 8, ... and 9 becomes 0).
259 *
260 * @param value the input string; may not be null
261 * @return the negated string; never null
262 * @see #negate(String)
263 */
264 protected static StringBuilder negate( StringBuilder value ) {
265 for (int i = 0, len = value.length(); i != len; ++i) {
266 char c = value.charAt(i);
267 if (c == ' ' || c == '-') continue;
268 value.setCharAt(i, (char)('9' - c + '0'));
269 }
270 return value;
271 }
272
273 /**
274 * Utility to remove the trailing 0's.
275 *
276 * @param sb the input string builder; may not be null
277 */
278 protected static void removeTralingZeros( StringBuilder sb ) {
279 int endIndex = sb.length();
280 if (endIndex > 0) {
281 --endIndex;
282 int index = endIndex;
283 while (sb.charAt(index) == '0') {
284 --index;
285 }
286 if (index < endIndex) sb.delete(index + 1, endIndex + 1);
287 }
288 }
289
290 /* Prevent instantiation */
291 private FieldUtil() {
292 }
293 }