View Javadoc

1   /*
2    * ModeShape (http://www.modeshape.org)
3    * See the COPYRIGHT.txt file distributed with this work for information
4    * regarding copyright ownership.  Some portions may be licensed
5    * to Red Hat, Inc. under one or more contributor license agreements.
6    * See the AUTHORS.txt file in the distribution for a full listing of 
7    * individual contributors. 
8    *
9    * ModeShape is free software. Unless otherwise indicated, all code in ModeShape
10   * is licensed to you under the terms of the GNU Lesser General Public License as
11   * published by the Free Software Foundation; either version 2.1 of
12   * the License, or (at your option) any later version.
13   *
14   * ModeShape is distributed in the hope that it will be useful,
15   * but WITHOUT ANY WARRANTY; without even the implied warranty of
16   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17   * Lesser General Public License for more details.
18   *
19   * You should have received a copy of the GNU Lesser General Public
20   * License along with this software; if not, write to the Free
21   * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
22   * 02110-1301 USA, or see the FSF site: http://www.fsf.org.
23   */
24  package org.modeshape.jdbc.util;
25  
26  import java.text.CharacterIterator;
27  import java.text.StringCharacterIterator;
28  import java.util.BitSet;
29  
30  import org.modeshape.jdbc.util.TextDecoder;
31  import org.modeshape.jdbc.util.TextEncoder;
32  import org.modeshape.jdbc.util.UrlEncoder;
33  
34  /**
35   * An encoder useful for converting text to be used within a URL, as defined by Section 2.3 of <a
36   * href="http://www.ietf.org/rfc/rfc2396.txt">RFC 2396</a>. Note that this class does not encode a complete URL (
37   * {@link java.net.URLEncoder} and {@link java.net.URLDecoder} should be used for such purposes).
38   */
39  public class UrlEncoder implements TextEncoder, TextDecoder {
40  
41      /**
42       * Data characters that are allowed in a URI but do not have a reserved purpose are called unreserved. These include upper and
43       * lower case letters, decimal digits, and a limited set of punctuation marks and symbols.
44       * 
45       * <pre>
46       * unreserved  = alphanum | mark
47       * mark        = &quot;-&quot; | &quot;_&quot; | &quot;.&quot; | &quot;!&quot; | &quot;&tilde;&quot; | &quot;*&quot; | &quot;'&quot; | &quot;(&quot; | &quot;)&quot;
48       * </pre>
49       * 
50       * Unreserved characters can be escaped without changing the semantics of the URI, but this should not be done unless the URI
51       * is being used in a context that does not allow the unescaped character to appear.
52       */
53      private static final BitSet RFC2396_UNRESERVED_CHARACTERS = new BitSet(256);
54      private static final BitSet RFC2396_UNRESERVED_WITH_SLASH_CHARACTERS;
55  
56      public static final char ESCAPE_CHARACTER = '%';
57  
58      static {
59          RFC2396_UNRESERVED_CHARACTERS.set('a', 'z' + 1);
60          RFC2396_UNRESERVED_CHARACTERS.set('A', 'Z' + 1);
61          RFC2396_UNRESERVED_CHARACTERS.set('0', '9' + 1);
62          RFC2396_UNRESERVED_CHARACTERS.set('-');
63          RFC2396_UNRESERVED_CHARACTERS.set('_');
64          RFC2396_UNRESERVED_CHARACTERS.set('.');
65          RFC2396_UNRESERVED_CHARACTERS.set('!');
66          RFC2396_UNRESERVED_CHARACTERS.set('~');
67          RFC2396_UNRESERVED_CHARACTERS.set('*');
68          RFC2396_UNRESERVED_CHARACTERS.set('\'');
69          RFC2396_UNRESERVED_CHARACTERS.set('(');
70          RFC2396_UNRESERVED_CHARACTERS.set(')');
71  
72          RFC2396_UNRESERVED_WITH_SLASH_CHARACTERS = (BitSet)RFC2396_UNRESERVED_CHARACTERS.clone();
73          RFC2396_UNRESERVED_WITH_SLASH_CHARACTERS.set('/');
74      }
75  
76      private boolean slashEncoded = true;
77  
78      /**
79       * {@inheritDoc}
80       */
81      public String encode( String text ) {
82          if (text == null) return null;
83          if (text.length() == 0) return text;
84          return encode(text, isSlashEncoded() ? RFC2396_UNRESERVED_CHARACTERS : RFC2396_UNRESERVED_WITH_SLASH_CHARACTERS);
85      }
86  
87      protected String encode( String text,
88                               BitSet safeChars ) {
89          final StringBuilder result = new StringBuilder();
90          final CharacterIterator iter = new StringCharacterIterator(text);
91          for (char c = iter.first(); c != CharacterIterator.DONE; c = iter.next()) {
92              if (safeChars.get(c)) {
93                  // Safe character, so just pass through ...
94                  result.append(c);
95              } else {
96                  // The character is not a safe character, and must be escaped ...
97                  result.append(ESCAPE_CHARACTER);
98                  result.append(Character.toLowerCase(Character.forDigit(c / 16, 16)));
99                  result.append(Character.toLowerCase(Character.forDigit(c % 16, 16)));
100             }
101         }
102         return result.toString();
103     }
104 
105     /**
106      * {@inheritDoc}
107      */
108     public String decode( String encodedText ) {
109         if (encodedText == null) return null;
110         if (encodedText.length() == 0) return encodedText;
111         final StringBuilder result = new StringBuilder();
112         final CharacterIterator iter = new StringCharacterIterator(encodedText);
113         for (char c = iter.first(); c != CharacterIterator.DONE; c = iter.next()) {
114             if (c == ESCAPE_CHARACTER) {
115                 boolean foundEscapedCharacter = false;
116                 // Found the first character in a potential escape sequence, so grab the next two characters ...
117                 char hexChar1 = iter.next();
118                 char hexChar2 = hexChar1 != CharacterIterator.DONE ? iter.next() : CharacterIterator.DONE;
119                 if (hexChar2 != CharacterIterator.DONE) {
120                     // We found two more characters, but ensure they form a valid hexadecimal number ...
121                     int hexNum1 = Character.digit(hexChar1, 16);
122                     int hexNum2 = Character.digit(hexChar2, 16);
123                     if (hexNum1 > -1 && hexNum2 > -1) {
124                         foundEscapedCharacter = true;
125                         result.append((char)(hexNum1 * 16 + hexNum2));
126                     }
127                 }
128                 if (!foundEscapedCharacter) {
129                     result.append(c);
130                     if (hexChar1 != CharacterIterator.DONE) result.append(hexChar1);
131                     if (hexChar2 != CharacterIterator.DONE) result.append(hexChar2);
132                 }
133             } else {
134                 result.append(c);
135             }
136         }
137         return result.toString();
138     }
139 
140     /**
141      * @return slashEncoded
142      */
143     public boolean isSlashEncoded() {
144         return this.slashEncoded;
145     }
146 
147     /**
148      * @param slashEncoded Sets slashEncoded to the specified value.
149      * @return this object, for method chaining
150      */
151     public UrlEncoder setSlashEncoded( boolean slashEncoded ) {
152         this.slashEncoded = slashEncoded;
153         return this;
154     }
155 
156 }