View Javadoc

1   /*
2    * ModeShape (http://www.modeshape.org)
3    * See the COPYRIGHT.txt file distributed with this work for information
4    * regarding copyright ownership.  Some portions may be licensed
5    * to Red Hat, Inc. under one or more contributor license agreements.
6    * See the AUTHORS.txt file in the distribution for a full listing of 
7    * individual contributors. 
8    *
9    * ModeShape is free software. Unless otherwise indicated, all code in ModeShape
10   * is licensed to you under the terms of the GNU Lesser General Public License as
11   * published by the Free Software Foundation; either version 2.1 of
12   * the License, or (at your option) any later version.
13   *
14   * ModeShape is distributed in the hope that it will be useful,
15   * but WITHOUT ANY WARRANTY; without even the implied warranty of
16   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17   * Lesser General Public License for more details.
18   *
19   * You should have received a copy of the GNU Lesser General Public
20   * License along with this software; if not, write to the Free
21   * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
22   * 02110-1301 USA, or see the FSF site: http://www.fsf.org.
23   */
24  package org.modeshape.common.text;
25  
26  import java.text.CharacterIterator;
27  import java.text.StringCharacterIterator;
28  import java.util.BitSet;
29  import net.jcip.annotations.Immutable;
30  
31  /**
32   * An encoder useful for converting text to be used within a URL, as defined by Section 2.3 of <a
33   * href="http://www.ietf.org/rfc/rfc2396.txt">RFC 2396</a>. Note that this class does not encode a complete URL (
34   * {@link java.net.URLEncoder} and {@link java.net.URLDecoder} should be used for such purposes).
35   */
36  @Immutable
37  public class UrlEncoder implements TextEncoder, TextDecoder {
38  
39      /**
40       * Data characters that are allowed in a URI but do not have a reserved purpose are called unreserved. These include upper and
41       * lower case letters, decimal digits, and a limited set of punctuation marks and symbols.
42       * 
43       * <pre>
44       * unreserved  = alphanum | mark
45       * mark        = &quot;-&quot; | &quot;_&quot; | &quot;.&quot; | &quot;!&quot; | &quot;&tilde;&quot; | &quot;*&quot; | &quot;'&quot; | &quot;(&quot; | &quot;)&quot;
46       * </pre>
47       * 
48       * Unreserved characters can be escaped without changing the semantics of the URI, but this should not be done unless the URI
49       * is being used in a context that does not allow the unescaped character to appear.
50       */
51      private static final BitSet RFC2396_UNRESERVED_CHARACTERS = new BitSet(256);
52      private static final BitSet RFC2396_UNRESERVED_WITH_SLASH_CHARACTERS;
53  
54      public static final char ESCAPE_CHARACTER = '%';
55  
56      static {
57          RFC2396_UNRESERVED_CHARACTERS.set('a', 'z' + 1);
58          RFC2396_UNRESERVED_CHARACTERS.set('A', 'Z' + 1);
59          RFC2396_UNRESERVED_CHARACTERS.set('0', '9' + 1);
60          RFC2396_UNRESERVED_CHARACTERS.set('-');
61          RFC2396_UNRESERVED_CHARACTERS.set('_');
62          RFC2396_UNRESERVED_CHARACTERS.set('.');
63          RFC2396_UNRESERVED_CHARACTERS.set('!');
64          RFC2396_UNRESERVED_CHARACTERS.set('~');
65          RFC2396_UNRESERVED_CHARACTERS.set('*');
66          RFC2396_UNRESERVED_CHARACTERS.set('\'');
67          RFC2396_UNRESERVED_CHARACTERS.set('(');
68          RFC2396_UNRESERVED_CHARACTERS.set(')');
69  
70          RFC2396_UNRESERVED_WITH_SLASH_CHARACTERS = (BitSet)RFC2396_UNRESERVED_CHARACTERS.clone();
71          RFC2396_UNRESERVED_WITH_SLASH_CHARACTERS.set('/');
72      }
73  
74      private boolean slashEncoded = true;
75  
76      /**
77       * {@inheritDoc}
78       */
79      public String encode( String text ) {
80          if (text == null) return null;
81          if (text.length() == 0) return text;
82          return encode(text, isSlashEncoded() ? RFC2396_UNRESERVED_CHARACTERS : RFC2396_UNRESERVED_WITH_SLASH_CHARACTERS);
83      }
84  
85      protected String encode( String text,
86                               BitSet safeChars ) {
87          final StringBuilder result = new StringBuilder();
88          final CharacterIterator iter = new StringCharacterIterator(text);
89          for (char c = iter.first(); c != CharacterIterator.DONE; c = iter.next()) {
90              if (safeChars.get(c)) {
91                  // Safe character, so just pass through ...
92                  result.append(c);
93              } else {
94                  // The character is not a safe character, and must be escaped ...
95                  result.append(ESCAPE_CHARACTER);
96                  result.append(Character.toLowerCase(Character.forDigit(c / 16, 16)));
97                  result.append(Character.toLowerCase(Character.forDigit(c % 16, 16)));
98              }
99          }
100         return result.toString();
101     }
102 
103     /**
104      * {@inheritDoc}
105      */
106     public String decode( String encodedText ) {
107         if (encodedText == null) return null;
108         if (encodedText.length() == 0) return encodedText;
109         final StringBuilder result = new StringBuilder();
110         final CharacterIterator iter = new StringCharacterIterator(encodedText);
111         for (char c = iter.first(); c != CharacterIterator.DONE; c = iter.next()) {
112             if (c == ESCAPE_CHARACTER) {
113                 boolean foundEscapedCharacter = false;
114                 // Found the first character in a potential escape sequence, so grab the next two characters ...
115                 char hexChar1 = iter.next();
116                 char hexChar2 = hexChar1 != CharacterIterator.DONE ? iter.next() : CharacterIterator.DONE;
117                 if (hexChar2 != CharacterIterator.DONE) {
118                     // We found two more characters, but ensure they form a valid hexadecimal number ...
119                     int hexNum1 = Character.digit(hexChar1, 16);
120                     int hexNum2 = Character.digit(hexChar2, 16);
121                     if (hexNum1 > -1 && hexNum2 > -1) {
122                         foundEscapedCharacter = true;
123                         result.append((char)(hexNum1 * 16 + hexNum2));
124                     }
125                 }
126                 if (!foundEscapedCharacter) {
127                     result.append(c);
128                     if (hexChar1 != CharacterIterator.DONE) result.append(hexChar1);
129                     if (hexChar2 != CharacterIterator.DONE) result.append(hexChar2);
130                 }
131             } else {
132                 result.append(c);
133             }
134         }
135         return result.toString();
136     }
137 
138     /**
139      * @return slashEncoded
140      */
141     public boolean isSlashEncoded() {
142         return this.slashEncoded;
143     }
144 
145     /**
146      * @param slashEncoded Sets slashEncoded to the specified value.
147      * @return this object, for method chaining
148      */
149     public UrlEncoder setSlashEncoded( boolean slashEncoded ) {
150         this.slashEncoded = slashEncoded;
151         return this;
152     }
153 
154 }