001 /*
002 * JBoss DNA (http://www.jboss.org/dna)
003 * See the COPYRIGHT.txt file distributed with this work for information
004 * regarding copyright ownership. Some portions may be licensed
005 * to Red Hat, Inc. under one or more contributor license agreements.
006 * See the AUTHORS.txt file in the distribution for a full listing of
007 * individual contributors.
008 *
009 * JBoss DNA is free software. Unless otherwise indicated, all code in JBoss DNA
010 * is licensed to you under the terms of the GNU Lesser General Public License as
011 * published by the Free Software Foundation; either version 2.1 of
012 * the License, or (at your option) any later version.
013 *
014 * JBoss DNA is distributed in the hope that it will be useful,
015 * but WITHOUT ANY WARRANTY; without even the implied warranty of
016 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
017 * Lesser General Public License for more details.
018 *
019 * You should have received a copy of the GNU Lesser General Public
020 * License along with this software; if not, write to the Free
021 * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
022 * 02110-1301 USA, or see the FSF site: http://www.fsf.org.
023 */
024 package org.jboss.dna.common.text;
025
026 import java.text.CharacterIterator;
027 import java.text.StringCharacterIterator;
028 import java.util.BitSet;
029
030 /**
031 * An encoder useful for converting text to be used within a URL, as defined by Section 2.3 of <a
032 * href="http://www.ietf.org/rfc/rfc2396.txt">RFC 2396</a>. Note that this class does not encode a complete URL ({@link java.net.URLEncoder}
033 * and {@link java.net.URLDecoder} should be used for such purposes).
034 *
035 * @author Randall Hauch
036 */
037 public class UrlEncoder implements TextEncoder, TextDecoder {
038
039 /**
040 * Data characters that are allowed in a URI but do not have a reserved purpose are called unreserved. These include upper and
041 * lower case letters, decimal digits, and a limited set of punctuation marks and symbols.
042 *
043 * <pre>
044 * unreserved = alphanum | mark
045 * mark = "-" | "_" | "." | "!" | "˜" | "*" | "'" | "(" | ")"
046 * </pre>
047 *
048 * Unreserved characters can be escaped without changing the semantics of the URI, but this should not be done unless the URI
049 * is being used in a context that does not allow the unescaped character to appear.
050 */
051 private static final BitSet RFC2396_UNRESERVED_CHARACTERS = new BitSet(256);
052 private static final BitSet RFC2396_UNRESERVED_WITH_SLASH_CHARACTERS;
053
054 public static final char ESCAPE_CHARACTER = '%';
055
056 static {
057 RFC2396_UNRESERVED_CHARACTERS.set('a', 'z' + 1);
058 RFC2396_UNRESERVED_CHARACTERS.set('A', 'Z' + 1);
059 RFC2396_UNRESERVED_CHARACTERS.set('0', '9' + 1);
060 RFC2396_UNRESERVED_CHARACTERS.set('-');
061 RFC2396_UNRESERVED_CHARACTERS.set('_');
062 RFC2396_UNRESERVED_CHARACTERS.set('.');
063 RFC2396_UNRESERVED_CHARACTERS.set('!');
064 RFC2396_UNRESERVED_CHARACTERS.set('~');
065 RFC2396_UNRESERVED_CHARACTERS.set('*');
066 RFC2396_UNRESERVED_CHARACTERS.set('\'');
067 RFC2396_UNRESERVED_CHARACTERS.set('(');
068 RFC2396_UNRESERVED_CHARACTERS.set(')');
069
070 RFC2396_UNRESERVED_WITH_SLASH_CHARACTERS = (BitSet)RFC2396_UNRESERVED_CHARACTERS.clone();
071 RFC2396_UNRESERVED_WITH_SLASH_CHARACTERS.set('/');
072 }
073
074 private boolean slashEncoded = true;
075
076 /**
077 * {@inheritDoc}
078 */
079 public String encode( String text ) {
080 if (text == null) return null;
081 if (text.length() == 0) return text;
082 final BitSet safeChars = isSlashEncoded() ? RFC2396_UNRESERVED_CHARACTERS : RFC2396_UNRESERVED_WITH_SLASH_CHARACTERS;
083 final StringBuilder result = new StringBuilder();
084 final CharacterIterator iter = new StringCharacterIterator(text);
085 for (char c = iter.first(); c != CharacterIterator.DONE; c = iter.next()) {
086 if (safeChars.get(c)) {
087 // Safe character, so just pass through ...
088 result.append(c);
089 } else {
090 // The character is not a safe character, and must be escaped ...
091 result.append(ESCAPE_CHARACTER);
092 result.append(Character.toLowerCase(Character.forDigit(c / 16, 16)));
093 result.append(Character.toLowerCase(Character.forDigit(c % 16, 16)));
094 }
095 }
096 return result.toString();
097 }
098
099 /**
100 * {@inheritDoc}
101 */
102 public String decode( String encodedText ) {
103 if (encodedText == null) return null;
104 if (encodedText.length() == 0) return encodedText;
105 final StringBuilder result = new StringBuilder();
106 final CharacterIterator iter = new StringCharacterIterator(encodedText);
107 for (char c = iter.first(); c != CharacterIterator.DONE; c = iter.next()) {
108 if (c == ESCAPE_CHARACTER) {
109 boolean foundEscapedCharacter = false;
110 // Found the first character in a potential escape sequence, so grab the next two characters ...
111 char hexChar1 = iter.next();
112 char hexChar2 = hexChar1 != CharacterIterator.DONE ? iter.next() : CharacterIterator.DONE;
113 if (hexChar2 != CharacterIterator.DONE) {
114 // We found two more characters, but ensure they form a valid hexadecimal number ...
115 int hexNum1 = Character.digit(hexChar1, 16);
116 int hexNum2 = Character.digit(hexChar2, 16);
117 if (hexNum1 > -1 && hexNum2 > -1) {
118 foundEscapedCharacter = true;
119 result.append((char)(hexNum1 * 16 + hexNum2));
120 }
121 }
122 if (!foundEscapedCharacter) {
123 result.append(c);
124 if (hexChar1 != CharacterIterator.DONE) result.append(hexChar1);
125 if (hexChar2 != CharacterIterator.DONE) result.append(hexChar2);
126 }
127 } else {
128 result.append(c);
129 }
130 }
131 return result.toString();
132 }
133
134 /**
135 * @return slashEncoded
136 */
137 public boolean isSlashEncoded() {
138 return this.slashEncoded;
139 }
140
141 /**
142 * @param slashEncoded Sets slashEncoded to the specified value.
143 * @return this object, for method chaining
144 */
145 public UrlEncoder setSlashEncoded( boolean slashEncoded ) {
146 this.slashEncoded = slashEncoded;
147 return this;
148 }
149
150 }