001 /*
002 * JBoss, Home of Professional Open Source.
003 * Copyright 2008, Red Hat Middleware LLC, and individual contributors
004 * as indicated by the @author tags. See the copyright.txt file in the
005 * distribution for a full listing of individual contributors.
006 *
007 * This is free software; you can redistribute it and/or modify it
008 * under the terms of the GNU Lesser General Public License as
009 * published by the Free Software Foundation; either version 2.1 of
010 * the License, or (at your option) any later version.
011 *
012 * This software is distributed in the hope that it will be useful,
013 * but WITHOUT ANY WARRANTY; without even the implied warranty of
014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
015 * Lesser General Public License for more details.
016 *
017 * You should have received a copy of the GNU Lesser General Public
018 * License along with this software; if not, write to the Free
019 * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
020 * 02110-1301 USA, or see the FSF site: http://www.fsf.org.
021 */
022 package org.jboss.dna.common.util;
023
024 /**
025 * <p>
026 * Encodes and decodes to and from Base64 notation.
027 * </p>
028 * <p>
029 * Homepage: <a href="http://iharder.net/base64">http://iharder.net/base64</a>.
030 * </p>
031 * <p>
032 * The <tt>options</tt> parameter, which appears in a few places, is used to pass several pieces of information to the encoder. In
033 * the "higher level" methods such as encodeBytes( bytes, options ) the options parameter can be used to indicate such things as
034 * first gzipping the bytes before encoding them, not inserting linefeeds (though that breaks strict Base64 compatibility), and
035 * encoding using the URL-safe and Ordered dialects.
036 * </p>
037 * <p>
038 * The constants defined in Base64 can be OR-ed together to combine options, so you might make a call like this:
039 * </p>
040 * <code>String encoded = Base64.encodeBytes( mybytes, Base64.GZIP | Base64.DONT_BREAK_LINES );</code>
041 * <p>
042 * to compress the data before encoding it and then making the output have no newline characters.
043 * </p>
044 * <p>
045 * Change Log:
046 * </p>
047 * <ul>
048 * <li>v2.2.2 - Fixed encodeFileToFile and decodeFileToFile to use the Base64.InputStream class to encode and decode on the fly
049 * which uses less memory than encoding/decoding an entire file into memory before writing.</li>
050 * <li>v2.2.1 - Fixed bug using URL_SAFE and ORDERED encodings. Fixed bug when using very small files (~< 40 bytes).</li>
051 * <li>v2.2 - Added some helper methods for encoding/decoding directly from one file to the next. Also added a main() method to
052 * support command line encoding/decoding from one file to the next. Also added these Base64 dialects:
053 * <ol>
054 * <li>The default is RFC3548 format.</li>
055 * <li>Calling Base64.setFormat(Base64.BASE64_FORMAT.URLSAFE_FORMAT) generates URL and file name friendly format as described in
056 * Section 4 of RFC3548. http://www.faqs.org/rfcs/rfc3548.html</li>
057 * <li>Calling Base64.setFormat(Base64.BASE64_FORMAT.ORDERED_FORMAT) generates URL and file name friendly format that preserves
058 * lexical ordering as described in http://www.faqs.org/qa/rfcc-1940.html</li>
059 * </ol>
060 * Special thanks to Jim Kellerman at <a href="http://www.powerset.com/">http://www.powerset.com/</a> for contributing the new
061 * Base64 dialects.</li>
062 * <li>v2.1 - Cleaned up javadoc comments and unused variables and methods. Added some convenience methods for reading and writing
063 * to and from files.</li>
064 * <li>v2.0.2 - Now specifies UTF-8 encoding in places where the code fails on systems with other encodings (like EBCDIC).</li>
065 * <li>v2.0.1 - Fixed an error when decoding a single byte, that is, when the encoded data was a single byte.</li>
066 * <li>v2.0 - I got rid of methods that used booleans to set options. Now everything is more consolidated and cleaner. The code
067 * now detects when data that's being decoded is gzip-compressed and will decompress it automatically. Generally things are
068 * cleaner. You'll probably have to change some method calls that you were making to support the new options format (<tt>int</tt>s
069 * that you "OR" together).</li>
070 * <li>v1.5.1 - Fixed bug when decompressing and decoding to a byte[] using <tt>decode( String s, boolean gzipCompressed )</tt>.
071 * Added the ability to "suspend" encoding in the Output Stream so you can turn on and off the encoding if you need to embed
072 * base64 data in an otherwise "normal" stream (like an XML file).</li>
073 * <li>v1.5 - Output stream pases on flush() command but doesn't do anything itself. This helps when using GZIP streams. Added the
074 * ability to GZip-compress objects before encoding them.</li>
075 * <li>v1.4 - Added helper methods to read/write files.</li>
076 * <li>v1.3.6 - Fixed OutputStream.flush() so that 'position' is reset.</li>
077 * <li>v1.3.5 - Added flag to turn on and off line breaks. Fixed bug in input stream where last buffer being read, if not
078 * completely full, was not returned.</li>
079 * <li>v1.3.4 - Fixed when "improperly padded stream" error was thrown at the wrong time.</li>
080 * <li>v1.3.3 - Fixed I/O streams which were totally messed up.</li>
081 * </ul>
082 * <p>
083 * I am placing this code in the Public Domain. Do with it as you will. This software comes with no guarantees or warranties but
084 * with plenty of well-wishing instead! Please visit <a href="http://iharder.net/base64">http://iharder.net/base64</a>
085 * periodically to check for updates or to contribute improvements.
086 * </p>
087 *
088 * @author Robert Harder
089 * @author rob@iharder.net
090 * @version 2.2.2
091 */
092 public class Base64 {
093
094 /* ******** P R I V A T E F I E L D S ******** */
095
096 /** Maximum line length (76) of Base64 output. */
097 private final static int MAX_LINE_LENGTH = 76;
098
099 /** The equals sign (=) as a byte. */
100 private final static byte EQUALS_SIGN = (byte)'=';
101
102 /** The new line character (\n) as a byte. */
103 private final static byte NEW_LINE = (byte)'\n';
104
105 /** Preferred encoding. */
106 private final static String PREFERRED_ENCODING = "UTF-8";
107
108 private final static byte WHITE_SPACE_ENC = -5; // Indicates white space in encoding
109 private final static byte EQUALS_SIGN_ENC = -1; // Indicates equals sign in encoding
110
111 /* ******** S T A N D A R D B A S E 6 4 A L P H A B E T ******** */
112
113 /** The 64 valid Base64 values. */
114 /* Host platform me be something funny like EBCDIC, so we hardcode these values. */
115 private final static byte[] _STANDARD_ALPHABET = {(byte)'A', (byte)'B', (byte)'C', (byte)'D', (byte)'E', (byte)'F',
116 (byte)'G', (byte)'H', (byte)'I', (byte)'J', (byte)'K', (byte)'L', (byte)'M', (byte)'N', (byte)'O', (byte)'P', (byte)'Q',
117 (byte)'R', (byte)'S', (byte)'T', (byte)'U', (byte)'V', (byte)'W', (byte)'X', (byte)'Y', (byte)'Z', (byte)'a', (byte)'b',
118 (byte)'c', (byte)'d', (byte)'e', (byte)'f', (byte)'g', (byte)'h', (byte)'i', (byte)'j', (byte)'k', (byte)'l', (byte)'m',
119 (byte)'n', (byte)'o', (byte)'p', (byte)'q', (byte)'r', (byte)'s', (byte)'t', (byte)'u', (byte)'v', (byte)'w', (byte)'x',
120 (byte)'y', (byte)'z', (byte)'0', (byte)'1', (byte)'2', (byte)'3', (byte)'4', (byte)'5', (byte)'6', (byte)'7', (byte)'8',
121 (byte)'9', (byte)'+', (byte)'/'};
122
123 /**
124 * Translates a Base64 value to either its 6-bit reconstruction value or a negative number indicating some other meaning.
125 **/
126 private final static byte[] _STANDARD_DECODABET = {-9, -9, -9, -9, -9, -9, -9, -9, -9, // Decimal 0 - 8
127 -5, -5, // Whitespace: Tab and Linefeed
128 -9, -9, // Decimal 11 - 12
129 -5, // Whitespace: Carriage Return
130 -9, -9, -9, -9, -9, -9, -9, -9, -9, -9, -9, -9, -9, // Decimal 14 - 26
131 -9, -9, -9, -9, -9, // Decimal 27 - 31
132 -5, // Whitespace: Space
133 -9, -9, -9, -9, -9, -9, -9, -9, -9, -9, // Decimal 33 - 42
134 62, // Plus sign at decimal 43
135 -9, -9, -9, // Decimal 44 - 46
136 63, // Slash at decimal 47
137 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, // Numbers zero through nine
138 -9, -9, -9, // Decimal 58 - 60
139 -1, // Equals sign at decimal 61
140 -9, -9, -9, // Decimal 62 - 64
141 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, // Letters 'A' through 'N'
142 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, // Letters 'O' through 'Z'
143 -9, -9, -9, -9, -9, -9, // Decimal 91 - 96
144 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, // Letters 'a' through 'm'
145 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, // Letters 'n' through 'z'
146 -9, -9, -9, -9 // Decimal 123 - 126
147 /*,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9, // Decimal 127 - 139
148 -9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9, // Decimal 140 - 152
149 -9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9, // Decimal 153 - 165
150 -9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9, // Decimal 166 - 178
151 -9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9, // Decimal 179 - 191
152 -9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9, // Decimal 192 - 204
153 -9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9, // Decimal 205 - 217
154 -9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9, // Decimal 218 - 230
155 -9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9, // Decimal 231 - 243
156 -9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9,-9 // Decimal 244 - 255 */
157 };
158
159 /** Defeats instantiation. */
160 private Base64() {
161 }
162
163 /* ******** E N C O D I N G M E T H O D S ******** */
164
165 /**
166 * <p>
167 * Encodes up to three bytes of the array <var>source</var> and writes the resulting four Base64 bytes to
168 * <var>destination</var>. The source and destination arrays can be manipulated anywhere along their length by specifying
169 * <var>srcOffset</var> and <var>destOffset</var>. This method does not check to make sure your arrays are large enough to
170 * accomodate <var>srcOffset</var> + 3 for the <var>source</var> array or <var>destOffset</var> + 4 for the
171 * <var>destination</var> array. The actual number of significant bytes in your array is given by <var>numSigBytes</var>.
172 * </p>
173 * <p>
174 * This is the lowest level of the encoding methods with all possible parameters.
175 * </p>
176 *
177 * @param source the array to convert
178 * @param srcOffset the index where conversion begins
179 * @param numSigBytes the number of significant bytes in your array
180 * @param destination the array to hold the conversion
181 * @param destOffset the index where output will be put
182 * @return the <var>destination</var> array
183 * @since 1.3
184 */
185 private static byte[] encode3to4( byte[] source,
186 int srcOffset,
187 int numSigBytes,
188 byte[] destination,
189 int destOffset ) {
190 byte[] ALPHABET = _STANDARD_ALPHABET;
191
192 // 1 2 3
193 // 01234567890123456789012345678901 Bit position
194 // --------000000001111111122222222 Array position from threeBytes
195 // --------| || || || | Six bit groups to index ALPHABET
196 // >>18 >>12 >> 6 >> 0 Right shift necessary
197 // 0x3f 0x3f 0x3f Additional AND
198
199 // Create buffer with zero-padding if there are only one or two
200 // significant bytes passed in the array.
201 // We have to shift left 24 in order to flush out the 1's that appear
202 // when Java treats a value as negative that is cast from a byte to an int.
203 int inBuff = (numSigBytes > 0 ? ((source[srcOffset] << 24) >>> 8) : 0)
204 | (numSigBytes > 1 ? ((source[srcOffset + 1] << 24) >>> 16) : 0)
205 | (numSigBytes > 2 ? ((source[srcOffset + 2] << 24) >>> 24) : 0);
206
207 switch (numSigBytes) {
208 case 3:
209 destination[destOffset] = ALPHABET[(inBuff >>> 18)];
210 destination[destOffset + 1] = ALPHABET[(inBuff >>> 12) & 0x3f];
211 destination[destOffset + 2] = ALPHABET[(inBuff >>> 6) & 0x3f];
212 destination[destOffset + 3] = ALPHABET[(inBuff) & 0x3f];
213 return destination;
214
215 case 2:
216 destination[destOffset] = ALPHABET[(inBuff >>> 18)];
217 destination[destOffset + 1] = ALPHABET[(inBuff >>> 12) & 0x3f];
218 destination[destOffset + 2] = ALPHABET[(inBuff >>> 6) & 0x3f];
219 destination[destOffset + 3] = EQUALS_SIGN;
220 return destination;
221
222 case 1:
223 destination[destOffset] = ALPHABET[(inBuff >>> 18)];
224 destination[destOffset + 1] = ALPHABET[(inBuff >>> 12) & 0x3f];
225 destination[destOffset + 2] = EQUALS_SIGN;
226 destination[destOffset + 3] = EQUALS_SIGN;
227 return destination;
228
229 default:
230 return destination;
231 } // end switch
232 } // end encode3to4
233
234 /**
235 * Encodes a byte array into Base64 notation. Does not GZip-compress data.
236 *
237 * @param source The data to convert
238 * @return the encoded data
239 * @since 1.4
240 */
241 public static String encodeBytes( byte[] source ) {
242 // Convert option to boolean in way that code likes it.
243 boolean breakLines = false;
244 int len = source.length;
245 int len43 = len * 4 / 3;
246 byte[] outBuff = new byte[(len43) // Main 4:3
247 + ((len % 3) > 0 ? 4 : 0) // Account for padding
248 + (breakLines ? (len43 / MAX_LINE_LENGTH) : 0)]; // New lines
249 int d = 0;
250 int e = 0;
251 int len2 = len - 2;
252 int lineLength = 0;
253 for (; d < len2; d += 3, e += 4) {
254 encode3to4(source, d, 3, outBuff, e);
255
256 lineLength += 4;
257 if (breakLines && lineLength == MAX_LINE_LENGTH) {
258 outBuff[e + 4] = NEW_LINE;
259 e++;
260 lineLength = 0;
261 } // end if: end of line
262 } // en dfor: each piece of array
263
264 if (d < len) {
265 encode3to4(source, d, len - d, outBuff, e);
266 e += 4;
267 } // end if: some padding needed
268
269 // Return value according to relevant encoding.
270 try {
271 return new String(outBuff, 0, e, PREFERRED_ENCODING);
272 } // end try
273 catch (java.io.UnsupportedEncodingException uue) {
274 return new String(outBuff, 0, e);
275 } // end catch
276
277 } // end else: don't compress
278
279 /* ******** D E C O D I N G M E T H O D S ******** */
280
281 /**
282 * Decodes four bytes from array <var>source</var> and writes the resulting bytes (up to three of them) to
283 * <var>destination</var>. The source and destination arrays can be manipulated anywhere along their length by specifying
284 * <var>srcOffset</var> and <var>destOffset</var>. This method does not check to make sure your arrays are large enough to
285 * accomodate <var>srcOffset</var> + 4 for the <var>source</var> array or <var>destOffset</var> + 3 for the
286 * <var>destination</var> array. This method returns the actual number of bytes that were converted from the Base64 encoding.
287 * <p>
288 * This is the lowest level of the decoding methods with all possible parameters.
289 * </p>
290 *
291 * @param source the array to convert
292 * @param srcOffset the index where conversion begins
293 * @param destination the array to hold the conversion
294 * @param destOffset destination offset
295 * @return the number of decoded bytes converted
296 * @since 1.3
297 */
298 private static int decode4to3( byte[] source,
299 int srcOffset,
300 byte[] destination,
301 int destOffset ) {
302 byte[] DECODABET = _STANDARD_DECODABET;
303
304 // Example: Dk==
305 if (source[srcOffset + 2] == EQUALS_SIGN) {
306 // Two ways to do the same thing. Don't know which way I like best.
307 // int outBuff = ( ( DECODABET[ source[ srcOffset ] ] << 24 ) >>> 6 )
308 // | ( ( DECODABET[ source[ srcOffset + 1] ] << 24 ) >>> 12 );
309 int outBuff = ((DECODABET[source[srcOffset]] & 0xFF) << 18) | ((DECODABET[source[srcOffset + 1]] & 0xFF) << 12);
310
311 destination[destOffset] = (byte)(outBuff >>> 16);
312 return 1;
313 }
314
315 // Example: DkL=
316 else if (source[srcOffset + 3] == EQUALS_SIGN) {
317 // Two ways to do the same thing. Don't know which way I like best.
318 // int outBuff = ( ( DECODABET[ source[ srcOffset ] ] << 24 ) >>> 6 )
319 // | ( ( DECODABET[ source[ srcOffset + 1 ] ] << 24 ) >>> 12 )
320 // | ( ( DECODABET[ source[ srcOffset + 2 ] ] << 24 ) >>> 18 );
321 int outBuff = ((DECODABET[source[srcOffset]] & 0xFF) << 18) | ((DECODABET[source[srcOffset + 1]] & 0xFF) << 12)
322 | ((DECODABET[source[srcOffset + 2]] & 0xFF) << 6);
323
324 destination[destOffset] = (byte)(outBuff >>> 16);
325 destination[destOffset + 1] = (byte)(outBuff >>> 8);
326 return 2;
327 }
328
329 // Example: DkLE
330 else {
331 // Two ways to do the same thing. Don't know which way I like best.
332 // int outBuff = ( ( DECODABET[ source[ srcOffset ] ] << 24 ) >>> 6 )
333 // | ( ( DECODABET[ source[ srcOffset + 1 ] ] << 24 ) >>> 12 )
334 // | ( ( DECODABET[ source[ srcOffset + 2 ] ] << 24 ) >>> 18 )
335 // | ( ( DECODABET[ source[ srcOffset + 3 ] ] << 24 ) >>> 24 );
336 int outBuff = ((DECODABET[source[srcOffset]] & 0xFF) << 18) | ((DECODABET[source[srcOffset + 1]] & 0xFF) << 12)
337 | ((DECODABET[source[srcOffset + 2]] & 0xFF) << 6) | ((DECODABET[source[srcOffset + 3]] & 0xFF));
338
339 destination[destOffset] = (byte)(outBuff >> 16);
340 destination[destOffset + 1] = (byte)(outBuff >> 8);
341 destination[destOffset + 2] = (byte)(outBuff);
342
343 return 3;
344 }
345 } // end decodeToBytes
346
347 /**
348 * Decodes data from Base64 notation.
349 *
350 * @param s the string to decode
351 * @return the decoded data
352 * @since 1.4
353 */
354 public static byte[] decode( String s ) {
355 byte[] source;
356 try {
357 source = s.getBytes(PREFERRED_ENCODING);
358 } // end try
359 catch (java.io.UnsupportedEncodingException uee) {
360 source = s.getBytes();
361 } // end catch
362 // </change>
363 if (source.length % 4 != 0) {
364 throw new IllegalArgumentException("Source bytes are not valid"); //$NON-NLS-1$
365 }
366 byte[] DECODABET = _STANDARD_DECODABET;
367 int len = source.length;
368 byte[] outBuff = new byte[len * 3 / 4]; // Upper limit on size of output
369 int outBuffPosn = 0;
370
371 byte[] b4 = new byte[4];
372 int b4Posn = 0;
373 int i = 0;
374 byte sbiCrop = 0;
375 byte sbiDecode = 0;
376 for (i = 0; i < len; i++) {
377 sbiCrop = (byte)(source[i] & 0x7f); // Only the low seven bits
378 sbiDecode = DECODABET[sbiCrop];
379
380 if (sbiDecode >= WHITE_SPACE_ENC) // White space, Equals sign or better
381 {
382 if (sbiDecode >= EQUALS_SIGN_ENC) {
383 b4[b4Posn++] = sbiCrop;
384 if (b4Posn > 3) {
385 outBuffPosn += decode4to3(b4, 0, outBuff, outBuffPosn);
386 b4Posn = 0;
387
388 // If that was the equals sign, break out of 'for' loop
389 if (sbiCrop == EQUALS_SIGN) break;
390 } // end if: quartet built
391
392 } // end if: equals sign or better
393
394 } // end if: white space, equals sign or better
395 else {
396 throw new IllegalArgumentException("Source bytes are not valid"); //$NON-NLS-1$
397 } // end else:
398 } // each input character
399
400 byte[] out = new byte[outBuffPosn];
401 System.arraycopy(outBuff, 0, out, 0, outBuffPosn);
402 return out;
403 } // end decode
404 }