View Javadoc

1   /*
2    * ModeShape (http://www.modeshape.org)
3    * See the COPYRIGHT.txt file distributed with this work for information
4    * regarding copyright ownership.  Some portions may be licensed
5    * to Red Hat, Inc. under one or more contributor license agreements.
6    * See the AUTHORS.txt file in the distribution for a full listing of 
7    * individual contributors.
8    *
9    * ModeShape is free software. Unless otherwise indicated, all code in ModeShape
10   * is licensed to you under the terms of the GNU Lesser General Public License as
11   * published by the Free Software Foundation; either version 2.1 of
12   * the License, or (at your option) any later version.
13   * 
14   * ModeShape is distributed in the hope that it will be useful,
15   * but WITHOUT ANY WARRANTY; without even the implied warranty of
16   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17   * Lesser General Public License for more details.
18   *
19   * You should have received a copy of the GNU Lesser General Public
20   * License along with this software; if not, write to the Free
21   * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
22   * 02110-1301 USA, or see the FSF site: http://www.fsf.org.
23   */
24  package org.modeshape.common.text;
25  
26  import java.text.CharacterIterator;
27  import java.text.StringCharacterIterator;
28  import java.util.HashMap;
29  import java.util.Map;
30  import net.jcip.annotations.Immutable;
31  
32  /**
33   * An encoder useful for converting text to be used within XML attribute values. The following translations will be performed:
34   * <table cellspacing="0" cellpadding="1" border="1">
35   * <tr>
36   * <th>Raw (Unencoded)<br/>Character</th>
37   * <th>Translated (Encoded)<br/>Entity</th>
38   * </tr>
39   * <tr>
40   * <td>&amp;</td>
41   * <td>&amp;amp;</td>
42   * </tr>
43   * <tr>
44   * <td>&lt;</td>
45   * <td>&amp;lt;</td>
46   * </tr>
47   * <tr>
48   * <td>&gt;</td>
49   * <td>&amp;gt;</td>
50   * </tr>
51   * <tr>
52   * <td>&quot;</td>
53   * <td>&amp;quot;</td>
54   * </tr>
55   * <tr>
56   * <td>&#039;</td>
57   * <td>&amp;#039;</td>
58   * </tr>
59   * <tr>
60   * <td>All Others</td>
61   * <td>No Translation</td>
62   * </tr>
63   * </table>
64   * </p>
65   */
66  @Immutable
67  public class XmlValueEncoder implements TextEncoder, TextDecoder {
68  
69      private static final Map<String, Character> SPECIAL_ENTITIES;
70  
71      static {
72          SPECIAL_ENTITIES = new HashMap<String, Character>();
73  
74          SPECIAL_ENTITIES.put("quot", '"');
75          SPECIAL_ENTITIES.put("gt", '>');
76          SPECIAL_ENTITIES.put("lt", '<');
77          SPECIAL_ENTITIES.put("amp", '&');
78  
79      }
80  
81      /**
82       * {@inheritDoc}
83       * 
84       * @see org.modeshape.common.text.TextEncoder#encode(java.lang.String)
85       */
86      public String encode( String text ) {
87          if (text == null) return null;
88          StringBuilder sb = new StringBuilder();
89          CharacterIterator iter = new StringCharacterIterator(text);
90          for (char c = iter.first(); c != CharacterIterator.DONE; c = iter.next()) {
91              switch (c) {
92                  case '&':
93                      sb.append("&amp;");
94                      break;
95                  case '"':
96                      sb.append("&quot;");
97                      break;
98                  case '<':
99                      sb.append("&lt;");
100                     break;
101                 case '>':
102                     sb.append("&gt;");
103                     break;
104                 case '\'':
105                     sb.append("&#039;");
106                     break;
107                 default:
108                     sb.append(c);
109             }
110         }
111         return sb.toString();
112     }
113 
114     /**
115      * {@inheritDoc}
116      * 
117      * @see org.modeshape.common.text.TextDecoder#decode(java.lang.String)
118      */
119     public String decode( String encodedText ) {
120         if (encodedText == null) return null;
121         StringBuilder sb = new StringBuilder();
122         CharacterIterator iter = new StringCharacterIterator(encodedText);
123         for (char c = iter.first(); c != CharacterIterator.DONE; c = iter.next()) {
124             if (c == '&') {
125                 int index = iter.getIndex();
126 
127                 do {
128                     c = iter.next();
129                 } while (c != CharacterIterator.DONE && c != ';');
130 
131                 // We found a closing semicolon
132                 if (c == ';') {
133                     String s = encodedText.substring(index + 1, iter.getIndex());
134 
135                     if (SPECIAL_ENTITIES.containsKey(s)) {
136                         sb.append(SPECIAL_ENTITIES.get(s));
137                         continue;
138 
139                     }
140 
141                     if (s.length() > 0 && s.charAt(0) == '#') {
142                         try {
143                             sb.append((char)Short.parseShort(s.substring(1, s.length())));
144                             continue;
145                         } catch (NumberFormatException nfe) {
146                             // This is possible in malformed encodings, but let it fall through
147                         }
148                     }
149                 }
150 
151                 // Malformed encoding, restore state and pass poorly encoded data back
152                 c = '&';
153                 iter.setIndex(index);
154             }
155 
156             sb.append(c);
157 
158         }
159         return sb.toString();
160     }
161 }