001 /*
002 * JBoss, Home of Professional Open Source.
003 * Copyright 2008, Red Hat Middleware LLC, and individual contributors
004 * as indicated by the @author tags. See the copyright.txt file in the
005 * distribution for a full listing of individual contributors.
006 *
007 * This is free software; you can redistribute it and/or modify it
008 * under the terms of the GNU Lesser General Public License as
009 * published by the Free Software Foundation; either version 2.1 of
010 * the License, or (at your option) any later version.
011 *
012 * This software is distributed in the hope that it will be useful,
013 * but WITHOUT ANY WARRANTY; without even the implied warranty of
014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
015 * Lesser General Public License for more details.
016 *
017 * You should have received a copy of the GNU Lesser General Public
018 * License along with this software; if not, write to the Free
019 * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
020 * 02110-1301 USA, or see the FSF site: http://www.fsf.org.
021 */
022 package org.jboss.dna.common.text;
023
024 import java.text.CharacterIterator;
025 import java.text.StringCharacterIterator;
026 import java.util.BitSet;
027
028 /**
029 * An encoder useful for converting text to be used within a URL, as defined by Section 2.3 of <a
030 * href="http://www.ietf.org/rfc/rfc2396.txt">RFC 2396</a>. Note that this class does not encode a complete URL ({@link java.net.URLEncoder}
031 * and {@link java.net.URLDecoder} should be used for such purposes).
032 *
033 * @author Randall Hauch
034 */
035 public class UrlEncoder implements TextEncoder, TextDecoder {
036
037 /**
038 * Data characters that are allowed in a URI but do not have a reserved purpose are called unreserved. These include upper and
039 * lower case letters, decimal digits, and a limited set of punctuation marks and symbols.
040 *
041 * <pre>
042 * unreserved = alphanum | mark
043 * mark = "-" | "_" | "." | "!" | "˜" | "*" | "'" | "(" | ")"
044 * </pre>
045 *
046 * Unreserved characters can be escaped without changing the semantics of the URI, but this should not be done unless the URI
047 * is being used in a context that does not allow the unescaped character to appear.
048 */
049 private static final BitSet RFC2396_UNRESERVED_CHARACTERS = new BitSet(256);
050 private static final BitSet RFC2396_UNRESERVED_WITH_SLASH_CHARACTERS;
051
052 public static final char ESCAPE_CHARACTER = '%';
053
054 static {
055 RFC2396_UNRESERVED_CHARACTERS.set('a', 'z' + 1);
056 RFC2396_UNRESERVED_CHARACTERS.set('A', 'Z' + 1);
057 RFC2396_UNRESERVED_CHARACTERS.set('0', '9' + 1);
058 RFC2396_UNRESERVED_CHARACTERS.set('-');
059 RFC2396_UNRESERVED_CHARACTERS.set('_');
060 RFC2396_UNRESERVED_CHARACTERS.set('.');
061 RFC2396_UNRESERVED_CHARACTERS.set('!');
062 RFC2396_UNRESERVED_CHARACTERS.set('~');
063 RFC2396_UNRESERVED_CHARACTERS.set('*');
064 RFC2396_UNRESERVED_CHARACTERS.set('\'');
065 RFC2396_UNRESERVED_CHARACTERS.set('(');
066 RFC2396_UNRESERVED_CHARACTERS.set(')');
067
068 RFC2396_UNRESERVED_WITH_SLASH_CHARACTERS = (BitSet)RFC2396_UNRESERVED_CHARACTERS.clone();
069 RFC2396_UNRESERVED_WITH_SLASH_CHARACTERS.set('/');
070 }
071
072 private boolean slashEncoded = true;
073
074 /**
075 * {@inheritDoc}
076 */
077 public String encode( String text ) {
078 if (text == null) return null;
079 if (text.length() == 0) return text;
080 final BitSet safeChars = isSlashEncoded() ? RFC2396_UNRESERVED_CHARACTERS : RFC2396_UNRESERVED_WITH_SLASH_CHARACTERS;
081 final StringBuilder result = new StringBuilder();
082 final CharacterIterator iter = new StringCharacterIterator(text);
083 for (char c = iter.first(); c != CharacterIterator.DONE; c = iter.next()) {
084 if (safeChars.get(c)) {
085 // Safe character, so just pass through ...
086 result.append(c);
087 } else {
088 // The character is not a safe character, and must be escaped ...
089 result.append(ESCAPE_CHARACTER);
090 result.append(Character.toLowerCase(Character.forDigit(c / 16, 16)));
091 result.append(Character.toLowerCase(Character.forDigit(c % 16, 16)));
092 }
093 }
094 return result.toString();
095 }
096
097 /**
098 * {@inheritDoc}
099 */
100 public String decode( String encodedText ) {
101 if (encodedText == null) return null;
102 if (encodedText.length() == 0) return encodedText;
103 final StringBuilder result = new StringBuilder();
104 final CharacterIterator iter = new StringCharacterIterator(encodedText);
105 for (char c = iter.first(); c != CharacterIterator.DONE; c = iter.next()) {
106 if (c == ESCAPE_CHARACTER) {
107 boolean foundEscapedCharacter = false;
108 // Found the first character in a potential escape sequence, so grab the next two characters ...
109 char hexChar1 = iter.next();
110 char hexChar2 = hexChar1 != CharacterIterator.DONE ? iter.next() : CharacterIterator.DONE;
111 if (hexChar2 != CharacterIterator.DONE) {
112 // We found two more characters, but ensure they form a valid hexadecimal number ...
113 int hexNum1 = Character.digit(hexChar1, 16);
114 int hexNum2 = Character.digit(hexChar2, 16);
115 if (hexNum1 > -1 && hexNum2 > -1) {
116 foundEscapedCharacter = true;
117 result.append((char)(hexNum1 * 16 + hexNum2));
118 }
119 }
120 if (!foundEscapedCharacter) {
121 result.append(c);
122 if (hexChar1 != CharacterIterator.DONE) result.append(hexChar1);
123 if (hexChar2 != CharacterIterator.DONE) result.append(hexChar2);
124 }
125 } else {
126 result.append(c);
127 }
128 }
129 return result.toString();
130 }
131
132 /**
133 * @return slashEncoded
134 */
135 public boolean isSlashEncoded() {
136 return this.slashEncoded;
137 }
138
139 /**
140 * @param slashEncoded Sets slashEncoded to the specified value.
141 * @return this object, for method chaining
142 */
143 public UrlEncoder setSlashEncoded( boolean slashEncoded ) {
144 this.slashEncoded = slashEncoded;
145 return this;
146 }
147
148 }