1 /* 2 * ModeShape (http://www.modeshape.org) 3 * See the COPYRIGHT.txt file distributed with this work for information 4 * regarding copyright ownership. Some portions may be licensed 5 * to Red Hat, Inc. under one or more contributor license agreements. 6 * See the AUTHORS.txt file in the distribution for a full listing of 7 * individual contributors. 8 * 9 * ModeShape is free software. Unless otherwise indicated, all code in ModeShape 10 * is licensed to you under the terms of the GNU Lesser General Public License as 11 * published by the Free Software Foundation; either version 2.1 of 12 * the License, or (at your option) any later version. 13 * 14 * ModeShape is distributed in the hope that it will be useful, 15 * but WITHOUT ANY WARRANTY; without even the implied warranty of 16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 17 * Lesser General Public License for more details. 18 * 19 * You should have received a copy of the GNU Lesser General Public 20 * License along with this software; if not, write to the Free 21 * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 22 * 02110-1301 USA, or see the FSF site: http://www.fsf.org. 23 */ 24 package org.modeshape.search.lucene; 25 26 import java.math.BigDecimal; 27 import java.math.BigInteger; 28 29 /** 30 * Utility for working with Lucene field values. 31 */ 32 public class FieldUtil { 33 34 /** 35 * Creates a canonical string representation of the supplied {@link BigDecimal} value, whereby all string representations are 36 * lexicographically sortable. This makes it possible to store the wide range of values that can be represented by BigDecimal, 37 * while still enabling sorting and range queries. 38 * <p> 39 * This canonical form represents all decimal values using a prescribed format, which is based upon <a 40 * href="http://www.mail-archive.com/java-user@lucene.apache.org/msg23632.html">Steven Rowe's suggestion</a> but with 41 * modifications to handle variable-length exponents (per his suggestion in the last sentence), use spaces between fields on 42 * where required (for minimal length), and utilize an optimized (e.g., shorter) form when the value is '0' or the exponent is 43 * '0'. Thus, this format contains only digits (e.g., '0'..'9') and the '-' character. 44 * 45 * <pre> 46 * <significand-sign><exponent-sign><exponent-length> <exponent><significand> 47 * </pre> 48 * 49 * where: 50 * <ul> 51 * <li>the <b>significand</b> is the part of the number containing the significant figures, and is a (big) integer value 52 * obtained from the BigDecimal using {@link BigDecimal#unscaledValue()};</li> 53 * <li>the <b>exponent</b> is the integer used to define the number of factors of 10 that are applied to the significand, 54 * obtained by computing <code>value.precision() - value.scale() - 1</code>;</li> 55 * </ul> 56 * Thus the fields are defined as: 57 * <ul> 58 * <li>the <code><significand-sign></code> is '-' if the significand is negative, '0' if equal to zero, or '1' if positive; 59 * </li> 60 * <li>the <code><exponent-sign></code> is '-' if the exponent is negative, '0' if equal to zero, or '1' if positive; if 61 * '0', then the <code><exponent-length></code> and <code><exponent></code> fields are not written;</li> 62 * <li>the <code><exponent-length></code> is the postive value representing the length of the <code><exponent></code>, 63 * and is not included when the <code><exponent-sign></code> is '0';</li> 64 * <li>the <code><exponent></code> is the integer used to define the number of factors of 10 that are applied to the 65 * significand, obtained by computing <code>value.precision() - value.scale() - 1</code>;</li> 66 * <li>the <code><significand></code> is the part of the number containing the significant figures, and is a (big) integer 67 * value obtained from the BigDecimal using {@link BigDecimal#unscaledValue()};</li> 68 * </ul> 69 * In the case of a negative significand, the <code><significand></code> field is negated such that each digit is replaced 70 * with <code>(base - digit - 1)</code> and appended by 'A' (which is greater than all other digits) to ensure that 71 * significands with greater precision are ordered before those that share significand prefixes but have lesser precision. 72 * </p> 73 * <p> 74 * Thus, the format for a negative BigDecimal value becomes: 75 * 76 * <pre> 77 * -<reversed-exponent-sign><negated-exponent-length> <negated-exponent><significand><sentinel> 78 * </pre> 79 * 80 * where the <code><sentinel></code> is always 'A'. Note that the exponent length field is also negated. 81 * </p> 82 * <h3>Examples</h3> 83 * <p> 84 * Here are several examples that show BigDecimal values and their corresponding canonical string representation: 85 * 86 * <pre> 87 * +5.E-3 => 1-1 65 88 * +1.E-2 => 1-1 71 89 * +1.0E-2 => 1-1 71 90 * +1.0000E-2 => 1-1 71 91 * +1.1E-2 => 1-1 711 92 * +1.11E-2 => 1-1 7111 93 * +1.2E-2 => 1-1 712 94 * +5.E-2 => 1-1 75 95 * +7.3E+2 => 111 273 96 * +7.4E+2 => 111 274 97 * +7.45E+2 => 111 2745 98 * +8.7654E+3 => 111 387654 99 * </pre> 100 * 101 * Here is how a BigDecimal value of {@link BigDecimal#ZERO zero} is represented: 102 * 103 * <pre> 104 * 0.0E0 => 0 105 * </pre> 106 * 107 * BigDecimal values with an exponent of '0' are represented as follows: 108 * 109 * <pre> 110 * +1.2E0 => 1012 111 * -1.2E0 => -087A 112 * </pre> 113 * 114 * And here are some negative value examples: 115 * 116 * <pre> 117 * -8.7654E+3 => --8 612345A 118 * -7.45E+2 => --8 7254A 119 * -7.4E+2 => --8 725A 120 * -7.3E+2 => --8 726A 121 * -5.E-2 => -18 24A 122 * -1.2E-2 => -18 287A 123 * -1.11E-2 => -18 2888A 124 * -1.1E-2 => -18 288A 125 * -1.0000E-2 => -18 28A 126 * -1.0E-2 => -18 28A 127 * -1.E-2 => -18 28A 128 * -5.E-3 => -18 34A 129 * -5.E-4 => -18 44A 130 * </pre> 131 * 132 * </p> 133 * <p> 134 * This canonical form is valid for all values of {@link BigDecimal}. 135 * </p> 136 * 137 * @param value the value to be converted into its canonical form; may not be null 138 * @return the canonical string representation; never null or empty 139 * @see #stringToDecimal(String) 140 */ 141 public static String decimalToString( BigDecimal value ) { 142 StringBuilder sb = new StringBuilder(); 143 boolean negate = false; 144 // <sigificand-sign> field 145 switch (value.signum()) { 146 case -1: 147 sb.append('-'); 148 negate = true; 149 break; 150 case 1: 151 sb.append('1'); 152 break; 153 default: 154 return "0"; 155 } 156 157 // <exponent-sign>, <exponent-length> and <exponent> fields 158 long exponent = value.precision() - value.scale() - 1; 159 if (exponent == 0) { 160 sb.append('0'); 161 } else { 162 if (negate) exponent = -exponent; 163 String exponentField = String.valueOf(Math.abs(exponent)); 164 int length = exponentField.length(); 165 char sign = exponent > 0 ? '1' : '-'; 166 if (exponent < 0) exponentField = negate(exponentField); 167 // <exponent-length> 168 String lengthField = String.valueOf(length); 169 if (negate) lengthField = negate(lengthField); 170 sb.append(sign).append(lengthField).append(' ').append(exponentField); 171 } 172 173 // <significand> 174 if (negate) value = value.negate(); 175 StringBuilder significand = new StringBuilder(value.unscaledValue().toString()); 176 removeTralingZeros(significand); 177 178 // Append the significand (and the sentinel character)... 179 sb.append(negate ? negate(significand).append('A') : significand); 180 181 return sb.toString(); 182 } 183 184 /** 185 * Converts the canonical string representation of a {@link BigDecimal} value into the object form. 186 * <p> 187 * See {@link #decimalToString(BigDecimal)} to documentation of the canonical form. 188 * </p> 189 * 190 * @param value the canonical string representation; may not be null or empty 191 * @return the BigDecimal representation; never null 192 * @see #decimalToString(BigDecimal) 193 */ 194 public static BigDecimal stringToDecimal( String value ) { 195 assert value != null; 196 assert value.length() != 0; 197 if ("0".equals(value)) return BigDecimal.ZERO; 198 199 boolean negate = false; 200 if (value.charAt(0) == '-') { 201 // Negative, so remove the trailing sentinel ... 202 assert value.charAt(value.length() - 1) == 'A'; 203 value = value.substring(0, value.length() - 1); 204 negate = true; 205 } 206 207 // <exponent-sign>, <exponent-length> and <exponent> fields 208 long exponent = 0L; 209 boolean negateExponent = false; 210 int endIndex = 0; 211 switch (value.charAt(1)) { 212 case '0': 213 value = value.substring(2); 214 break; 215 case '-': 216 negateExponent = true; 217 // $FALL-THROUGH$ 218 case '1': 219 default: 220 // Read in the <exponent-length> 221 int indexOfSpace = value.indexOf(' ', 2); 222 String lengthField = value.substring(2, indexOfSpace); 223 if (negate) lengthField = negate(lengthField); 224 int lengthOfExponent = Integer.parseInt(lengthField); 225 // Read in the <exponent> (after the space) ... 226 int startIndex = indexOfSpace + 1; 227 endIndex = startIndex + lengthOfExponent; 228 String exponentField = value.substring(startIndex, endIndex); 229 exponent = Long.parseLong(negateExponent ? negate(exponentField) : exponentField); 230 if (negate) negateExponent = !negateExponent; 231 if (negateExponent) exponent = -exponent; 232 value = value.substring(endIndex); 233 } 234 235 // <significand> 236 if (negate) { 237 value = negate(value); 238 } 239 BigInteger significand = new BigInteger(value); 240 int scale = (int)(value.length() - exponent - 1); 241 242 // Now create the result ... 243 return new BigDecimal(negate ? significand.negate() : significand, scale); 244 } 245 246 /** 247 * Compute the "negated" string, which replaces the digits (0 becomes 9, 1 becomes 8, ... and 9 becomes 0). 248 * 249 * @param value the input string; may not be null 250 * @return the negated string; never null 251 * @see #negate(StringBuilder) 252 */ 253 protected static String negate( String value ) { 254 return negate(new StringBuilder(value)).toString(); 255 } 256 257 /** 258 * Compute the "negated" string, which replaces the digits (0 becomes 9, 1 becomes 8, ... and 9 becomes 0). 259 * 260 * @param value the input string; may not be null 261 * @return the negated string; never null 262 * @see #negate(String) 263 */ 264 protected static StringBuilder negate( StringBuilder value ) { 265 for (int i = 0, len = value.length(); i != len; ++i) { 266 char c = value.charAt(i); 267 if (c == ' ' || c == '-') continue; 268 value.setCharAt(i, (char)('9' - c + '0')); 269 } 270 return value; 271 } 272 273 /** 274 * Utility to remove the trailing 0's. 275 * 276 * @param sb the input string builder; may not be null 277 */ 278 protected static void removeTralingZeros( StringBuilder sb ) { 279 int endIndex = sb.length(); 280 if (endIndex > 0) { 281 --endIndex; 282 int index = endIndex; 283 while (sb.charAt(index) == '0') { 284 --index; 285 } 286 if (index < endIndex) sb.delete(index + 1, endIndex + 1); 287 } 288 } 289 290 /* Prevent instantiation */ 291 private FieldUtil() { 292 } 293 }