1 /*
2 * ModeShape (http://www.modeshape.org)
3 * See the COPYRIGHT.txt file distributed with this work for information
4 * regarding copyright ownership. Some portions may be licensed
5 * to Red Hat, Inc. under one or more contributor license agreements.
6 * See the AUTHORS.txt file in the distribution for a full listing of
7 * individual contributors.
8 *
9 * ModeShape is free software. Unless otherwise indicated, all code in ModeShape
10 * is licensed to you under the terms of the GNU Lesser General Public License as
11 * published by the Free Software Foundation; either version 2.1 of
12 * the License, or (at your option) any later version.
13 *
14 * ModeShape is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this software; if not, write to the Free
21 * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
22 * 02110-1301 USA, or see the FSF site: http://www.fsf.org.
23 */
24 package org.modeshape.jdbc.util;
25
26 import java.text.CharacterIterator;
27 import java.text.StringCharacterIterator;
28 import java.util.BitSet;
29
30 import org.modeshape.jdbc.util.TextDecoder;
31 import org.modeshape.jdbc.util.TextEncoder;
32 import org.modeshape.jdbc.util.UrlEncoder;
33
34 /**
35 * An encoder useful for converting text to be used within a URL, as defined by Section 2.3 of <a
36 * href="http://www.ietf.org/rfc/rfc2396.txt">RFC 2396</a>. Note that this class does not encode a complete URL (
37 * {@link java.net.URLEncoder} and {@link java.net.URLDecoder} should be used for such purposes).
38 */
39 public class UrlEncoder implements TextEncoder, TextDecoder {
40
41 /**
42 * Data characters that are allowed in a URI but do not have a reserved purpose are called unreserved. These include upper and
43 * lower case letters, decimal digits, and a limited set of punctuation marks and symbols.
44 *
45 * <pre>
46 * unreserved = alphanum | mark
47 * mark = "-" | "_" | "." | "!" | "˜" | "*" | "'" | "(" | ")"
48 * </pre>
49 *
50 * Unreserved characters can be escaped without changing the semantics of the URI, but this should not be done unless the URI
51 * is being used in a context that does not allow the unescaped character to appear.
52 */
53 private static final BitSet RFC2396_UNRESERVED_CHARACTERS = new BitSet(256);
54 private static final BitSet RFC2396_UNRESERVED_WITH_SLASH_CHARACTERS;
55
56 public static final char ESCAPE_CHARACTER = '%';
57
58 static {
59 RFC2396_UNRESERVED_CHARACTERS.set('a', 'z' + 1);
60 RFC2396_UNRESERVED_CHARACTERS.set('A', 'Z' + 1);
61 RFC2396_UNRESERVED_CHARACTERS.set('0', '9' + 1);
62 RFC2396_UNRESERVED_CHARACTERS.set('-');
63 RFC2396_UNRESERVED_CHARACTERS.set('_');
64 RFC2396_UNRESERVED_CHARACTERS.set('.');
65 RFC2396_UNRESERVED_CHARACTERS.set('!');
66 RFC2396_UNRESERVED_CHARACTERS.set('~');
67 RFC2396_UNRESERVED_CHARACTERS.set('*');
68 RFC2396_UNRESERVED_CHARACTERS.set('\'');
69 RFC2396_UNRESERVED_CHARACTERS.set('(');
70 RFC2396_UNRESERVED_CHARACTERS.set(')');
71
72 RFC2396_UNRESERVED_WITH_SLASH_CHARACTERS = (BitSet)RFC2396_UNRESERVED_CHARACTERS.clone();
73 RFC2396_UNRESERVED_WITH_SLASH_CHARACTERS.set('/');
74 }
75
76 private boolean slashEncoded = true;
77
78 /**
79 * {@inheritDoc}
80 */
81 public String encode( String text ) {
82 if (text == null) return null;
83 if (text.length() == 0) return text;
84 return encode(text, isSlashEncoded() ? RFC2396_UNRESERVED_CHARACTERS : RFC2396_UNRESERVED_WITH_SLASH_CHARACTERS);
85 }
86
87 protected String encode( String text,
88 BitSet safeChars ) {
89 final StringBuilder result = new StringBuilder();
90 final CharacterIterator iter = new StringCharacterIterator(text);
91 for (char c = iter.first(); c != CharacterIterator.DONE; c = iter.next()) {
92 if (safeChars.get(c)) {
93 // Safe character, so just pass through ...
94 result.append(c);
95 } else {
96 // The character is not a safe character, and must be escaped ...
97 result.append(ESCAPE_CHARACTER);
98 result.append(Character.toLowerCase(Character.forDigit(c / 16, 16)));
99 result.append(Character.toLowerCase(Character.forDigit(c % 16, 16)));
100 }
101 }
102 return result.toString();
103 }
104
105 /**
106 * {@inheritDoc}
107 */
108 public String decode( String encodedText ) {
109 if (encodedText == null) return null;
110 if (encodedText.length() == 0) return encodedText;
111 final StringBuilder result = new StringBuilder();
112 final CharacterIterator iter = new StringCharacterIterator(encodedText);
113 for (char c = iter.first(); c != CharacterIterator.DONE; c = iter.next()) {
114 if (c == ESCAPE_CHARACTER) {
115 boolean foundEscapedCharacter = false;
116 // Found the first character in a potential escape sequence, so grab the next two characters ...
117 char hexChar1 = iter.next();
118 char hexChar2 = hexChar1 != CharacterIterator.DONE ? iter.next() : CharacterIterator.DONE;
119 if (hexChar2 != CharacterIterator.DONE) {
120 // We found two more characters, but ensure they form a valid hexadecimal number ...
121 int hexNum1 = Character.digit(hexChar1, 16);
122 int hexNum2 = Character.digit(hexChar2, 16);
123 if (hexNum1 > -1 && hexNum2 > -1) {
124 foundEscapedCharacter = true;
125 result.append((char)(hexNum1 * 16 + hexNum2));
126 }
127 }
128 if (!foundEscapedCharacter) {
129 result.append(c);
130 if (hexChar1 != CharacterIterator.DONE) result.append(hexChar1);
131 if (hexChar2 != CharacterIterator.DONE) result.append(hexChar2);
132 }
133 } else {
134 result.append(c);
135 }
136 }
137 return result.toString();
138 }
139
140 /**
141 * @return slashEncoded
142 */
143 public boolean isSlashEncoded() {
144 return this.slashEncoded;
145 }
146
147 /**
148 * @param slashEncoded Sets slashEncoded to the specified value.
149 * @return this object, for method chaining
150 */
151 public UrlEncoder setSlashEncoded( boolean slashEncoded ) {
152 this.slashEncoded = slashEncoded;
153 return this;
154 }
155
156 }