1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24 package org.modeshape.common.text;
25
26 import java.text.CharacterIterator;
27 import java.text.StringCharacterIterator;
28 import java.util.BitSet;
29 import net.jcip.annotations.Immutable;
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45 @Immutable
46 public class XmlNameEncoder implements TextDecoder, TextEncoder {
47
48 private static final BitSet XML_NAME_ALLOWED_CHARACTERS = new BitSet(2 ^ 16);
49
50 static {
51
52
53
54 XML_NAME_ALLOWED_CHARACTERS.set('.');
55 XML_NAME_ALLOWED_CHARACTERS.set('-');
56 XML_NAME_ALLOWED_CHARACTERS.set('_');
57 XML_NAME_ALLOWED_CHARACTERS.set(':');
58
59
60 XML_NAME_ALLOWED_CHARACTERS.set('\u0041', '\u005A' + 1);
61 XML_NAME_ALLOWED_CHARACTERS.set('\u0061', '\u007A' + 1);
62 XML_NAME_ALLOWED_CHARACTERS.set('\u00C0', '\u00D6' + 1);
63 XML_NAME_ALLOWED_CHARACTERS.set('\u00D8', '\u00F6' + 1);
64 XML_NAME_ALLOWED_CHARACTERS.set('\u00F8', '\u00FF' + 1);
65 XML_NAME_ALLOWED_CHARACTERS.set('\u0100', '\u0131' + 1);
66 XML_NAME_ALLOWED_CHARACTERS.set('\u0134', '\u013E' + 1);
67 XML_NAME_ALLOWED_CHARACTERS.set('\u0141', '\u0148' + 1);
68 XML_NAME_ALLOWED_CHARACTERS.set('\u014A', '\u017E' + 1);
69 XML_NAME_ALLOWED_CHARACTERS.set('\u0180', '\u01C3' + 1);
70 XML_NAME_ALLOWED_CHARACTERS.set('\u01CD', '\u01F0' + 1);
71 XML_NAME_ALLOWED_CHARACTERS.set('\u01F4', '\u01F5' + 1);
72 XML_NAME_ALLOWED_CHARACTERS.set('\u01FA', '\u0217' + 1);
73 XML_NAME_ALLOWED_CHARACTERS.set('\u0250', '\u02A8' + 1);
74 XML_NAME_ALLOWED_CHARACTERS.set('\u02BB', '\u02C1' + 1);
75 XML_NAME_ALLOWED_CHARACTERS.set('\u0386');
76 XML_NAME_ALLOWED_CHARACTERS.set('\u0388', '\u038A' + 1);
77 XML_NAME_ALLOWED_CHARACTERS.set('\u038C');
78 XML_NAME_ALLOWED_CHARACTERS.set('\u038E', '\u03A1' + 1);
79 XML_NAME_ALLOWED_CHARACTERS.set('\u03A3', '\u03CE' + 1);
80 XML_NAME_ALLOWED_CHARACTERS.set('\u03D0', '\u03D6' + 1);
81 XML_NAME_ALLOWED_CHARACTERS.set('\u03DA');
82 XML_NAME_ALLOWED_CHARACTERS.set('\u03DC');
83 XML_NAME_ALLOWED_CHARACTERS.set('\u03DE');
84 XML_NAME_ALLOWED_CHARACTERS.set('\u03E0');
85 XML_NAME_ALLOWED_CHARACTERS.set('\u03E2', '\u03F3' + 1);
86 XML_NAME_ALLOWED_CHARACTERS.set('\u0401', '\u040C' + 1);
87 XML_NAME_ALLOWED_CHARACTERS.set('\u040E', '\u044F' + 1);
88 XML_NAME_ALLOWED_CHARACTERS.set('\u0451', '\u045C' + 1);
89 XML_NAME_ALLOWED_CHARACTERS.set('\u045E', '\u0481' + 1);
90 XML_NAME_ALLOWED_CHARACTERS.set('\u0490', '\u04C4' + 1);
91 XML_NAME_ALLOWED_CHARACTERS.set('\u04C7', '\u04C8' + 1);
92 XML_NAME_ALLOWED_CHARACTERS.set('\u04CB', '\u04CC' + 1);
93 XML_NAME_ALLOWED_CHARACTERS.set('\u04D0', '\u04EB' + 1);
94 XML_NAME_ALLOWED_CHARACTERS.set('\u04EE', '\u04F5' + 1);
95 XML_NAME_ALLOWED_CHARACTERS.set('\u04F8', '\u04F9' + 1);
96 XML_NAME_ALLOWED_CHARACTERS.set('\u0531', '\u0556' + 1);
97 XML_NAME_ALLOWED_CHARACTERS.set('\u0559');
98 XML_NAME_ALLOWED_CHARACTERS.set('\u0561', '\u0586' + 1);
99 XML_NAME_ALLOWED_CHARACTERS.set('\u05D0', '\u05EA' + 1);
100 XML_NAME_ALLOWED_CHARACTERS.set('\u05F0', '\u05F2' + 1);
101 XML_NAME_ALLOWED_CHARACTERS.set('\u0621', '\u063A' + 1);
102 XML_NAME_ALLOWED_CHARACTERS.set('\u0641', '\u064A' + 1);
103 XML_NAME_ALLOWED_CHARACTERS.set('\u0671', '\u06B7' + 1);
104 XML_NAME_ALLOWED_CHARACTERS.set('\u06BA', '\u06BE' + 1);
105 XML_NAME_ALLOWED_CHARACTERS.set('\u06C0', '\u06CE' + 1);
106 XML_NAME_ALLOWED_CHARACTERS.set('\u06D0', '\u06D3' + 1);
107 XML_NAME_ALLOWED_CHARACTERS.set('\u06D5');
108 XML_NAME_ALLOWED_CHARACTERS.set('\u06E5', '\u06E6' + 1);
109 XML_NAME_ALLOWED_CHARACTERS.set('\u0905', '\u0939' + 1);
110 XML_NAME_ALLOWED_CHARACTERS.set('\u093D');
111 XML_NAME_ALLOWED_CHARACTERS.set('\u0958', '\u0961' + 1);
112 XML_NAME_ALLOWED_CHARACTERS.set('\u0985', '\u098C' + 1);
113 XML_NAME_ALLOWED_CHARACTERS.set('\u098F', '\u0990' + 1);
114 XML_NAME_ALLOWED_CHARACTERS.set('\u0993', '\u09A8' + 1);
115 XML_NAME_ALLOWED_CHARACTERS.set('\u09AA', '\u09B0' + 1);
116 XML_NAME_ALLOWED_CHARACTERS.set('\u09B2');
117 XML_NAME_ALLOWED_CHARACTERS.set('\u09B6', '\u09B9' + 1);
118 XML_NAME_ALLOWED_CHARACTERS.set('\u09DC', '\u09DD' + 1);
119 XML_NAME_ALLOWED_CHARACTERS.set('\u09DF', '\u09E1' + 1);
120 XML_NAME_ALLOWED_CHARACTERS.set('\u09F0', '\u09F1' + 1);
121 XML_NAME_ALLOWED_CHARACTERS.set('\u0A05', '\u0A0A' + 1);
122 XML_NAME_ALLOWED_CHARACTERS.set('\u0A0F', '\u0A10' + 1);
123 XML_NAME_ALLOWED_CHARACTERS.set('\u0A13', '\u0A28' + 1);
124 XML_NAME_ALLOWED_CHARACTERS.set('\u0A2A', '\u0A30' + 1);
125 XML_NAME_ALLOWED_CHARACTERS.set('\u0A32', '\u0A33' + 1);
126 XML_NAME_ALLOWED_CHARACTERS.set('\u0A35', '\u0A36' + 1);
127 XML_NAME_ALLOWED_CHARACTERS.set('\u0A38', '\u0A39' + 1);
128 XML_NAME_ALLOWED_CHARACTERS.set('\u0A59', '\u0A5C' + 1);
129 XML_NAME_ALLOWED_CHARACTERS.set('\u0A5E');
130 XML_NAME_ALLOWED_CHARACTERS.set('\u0A72', '\u0A74' + 1);
131 XML_NAME_ALLOWED_CHARACTERS.set('\u0A85', '\u0A8B' + 1);
132 XML_NAME_ALLOWED_CHARACTERS.set('\u0A8D');
133 XML_NAME_ALLOWED_CHARACTERS.set('\u0A8F', '\u0A91' + 1);
134 XML_NAME_ALLOWED_CHARACTERS.set('\u0A93', '\u0AA8' + 1);
135 XML_NAME_ALLOWED_CHARACTERS.set('\u0AAA', '\u0AB0' + 1);
136 XML_NAME_ALLOWED_CHARACTERS.set('\u0AB2', '\u0AB3' + 1);
137 XML_NAME_ALLOWED_CHARACTERS.set('\u0AB5', '\u0AB9' + 1);
138 XML_NAME_ALLOWED_CHARACTERS.set('\u0ABD');
139 XML_NAME_ALLOWED_CHARACTERS.set('\u0AE0');
140 XML_NAME_ALLOWED_CHARACTERS.set('\u0B05', '\u0B0C' + 1);
141 XML_NAME_ALLOWED_CHARACTERS.set('\u0B0F', '\u0B10' + 1);
142 XML_NAME_ALLOWED_CHARACTERS.set('\u0B13', '\u0B28' + 1);
143 XML_NAME_ALLOWED_CHARACTERS.set('\u0B2A', '\u0B30' + 1);
144 XML_NAME_ALLOWED_CHARACTERS.set('\u0B32', '\u0B33' + 1);
145 XML_NAME_ALLOWED_CHARACTERS.set('\u0B36', '\u0B39' + 1);
146 XML_NAME_ALLOWED_CHARACTERS.set('\u0B3D');
147 XML_NAME_ALLOWED_CHARACTERS.set('\u0B5C', '\u0B5D' + 1);
148 XML_NAME_ALLOWED_CHARACTERS.set('\u0B5F', '\u0B61' + 1);
149 XML_NAME_ALLOWED_CHARACTERS.set('\u0B85', '\u0B8A' + 1);
150 XML_NAME_ALLOWED_CHARACTERS.set('\u0B8E', '\u0B90' + 1);
151 XML_NAME_ALLOWED_CHARACTERS.set('\u0B92', '\u0B95' + 1);
152 XML_NAME_ALLOWED_CHARACTERS.set('\u0B99', '\u0B9A' + 1);
153 XML_NAME_ALLOWED_CHARACTERS.set('\u0B9C');
154 XML_NAME_ALLOWED_CHARACTERS.set('\u0B9E', '\u0B9F' + 1);
155 XML_NAME_ALLOWED_CHARACTERS.set('\u0BA3', '\u0BA4' + 1);
156 XML_NAME_ALLOWED_CHARACTERS.set('\u0BA8', '\u0BAA' + 1);
157 XML_NAME_ALLOWED_CHARACTERS.set('\u0BAE', '\u0BB5' + 1);
158 XML_NAME_ALLOWED_CHARACTERS.set('\u0BB7', '\u0BB9' + 1);
159 XML_NAME_ALLOWED_CHARACTERS.set('\u0C05', '\u0C0C' + 1);
160 XML_NAME_ALLOWED_CHARACTERS.set('\u0C0E', '\u0C10' + 1);
161 XML_NAME_ALLOWED_CHARACTERS.set('\u0C12', '\u0C28' + 1);
162 XML_NAME_ALLOWED_CHARACTERS.set('\u0C2A', '\u0C33' + 1);
163 XML_NAME_ALLOWED_CHARACTERS.set('\u0C35', '\u0C39' + 1);
164 XML_NAME_ALLOWED_CHARACTERS.set('\u0C60', '\u0C61' + 1);
165 XML_NAME_ALLOWED_CHARACTERS.set('\u0C85', '\u0C8C' + 1);
166 XML_NAME_ALLOWED_CHARACTERS.set('\u0C8E', '\u0C90' + 1);
167 XML_NAME_ALLOWED_CHARACTERS.set('\u0C92', '\u0CA8' + 1);
168 XML_NAME_ALLOWED_CHARACTERS.set('\u0CAA', '\u0CB3' + 1);
169 XML_NAME_ALLOWED_CHARACTERS.set('\u0CB5', '\u0CB9' + 1);
170 XML_NAME_ALLOWED_CHARACTERS.set('\u0CDE');
171 XML_NAME_ALLOWED_CHARACTERS.set('\u0CE0', '\u0CE1' + 1);
172 XML_NAME_ALLOWED_CHARACTERS.set('\u0D05', '\u0D0C' + 1);
173 XML_NAME_ALLOWED_CHARACTERS.set('\u0D0E', '\u0D10' + 1);
174 XML_NAME_ALLOWED_CHARACTERS.set('\u0D12', '\u0D28' + 1);
175 XML_NAME_ALLOWED_CHARACTERS.set('\u0D2A', '\u0D39' + 1);
176 XML_NAME_ALLOWED_CHARACTERS.set('\u0D60', '\u0D61' + 1);
177 XML_NAME_ALLOWED_CHARACTERS.set('\u0E01', '\u0E2E' + 1);
178 XML_NAME_ALLOWED_CHARACTERS.set('\u0E30');
179 XML_NAME_ALLOWED_CHARACTERS.set('\u0E32', '\u0E33' + 1);
180 XML_NAME_ALLOWED_CHARACTERS.set('\u0E40', '\u0E45' + 1);
181 XML_NAME_ALLOWED_CHARACTERS.set('\u0E81', '\u0E82' + 1);
182 XML_NAME_ALLOWED_CHARACTERS.set('\u0E84');
183 XML_NAME_ALLOWED_CHARACTERS.set('\u0E87', '\u0E88' + 1);
184 XML_NAME_ALLOWED_CHARACTERS.set('\u0E8A');
185 XML_NAME_ALLOWED_CHARACTERS.set('\u0E8D');
186 XML_NAME_ALLOWED_CHARACTERS.set('\u0E94', '\u0E97' + 1);
187 XML_NAME_ALLOWED_CHARACTERS.set('\u0E99', '\u0E9F' + 1);
188 XML_NAME_ALLOWED_CHARACTERS.set('\u0EA1', '\u0EA3' + 1);
189 XML_NAME_ALLOWED_CHARACTERS.set('\u0EA5');
190 XML_NAME_ALLOWED_CHARACTERS.set('\u0EA7');
191 XML_NAME_ALLOWED_CHARACTERS.set('\u0EAA', '\u0EAB' + 1);
192 XML_NAME_ALLOWED_CHARACTERS.set('\u0EAD', '\u0EAE' + 1);
193 XML_NAME_ALLOWED_CHARACTERS.set('\u0EB0');
194 XML_NAME_ALLOWED_CHARACTERS.set('\u0EB2', '\u0EB3' + 1);
195 XML_NAME_ALLOWED_CHARACTERS.set('\u0EBD');
196 XML_NAME_ALLOWED_CHARACTERS.set('\u0EC0', '\u0EC4' + 1);
197 XML_NAME_ALLOWED_CHARACTERS.set('\u0F40', '\u0F47' + 1);
198 XML_NAME_ALLOWED_CHARACTERS.set('\u0F49', '\u0F69' + 1);
199 XML_NAME_ALLOWED_CHARACTERS.set('\u10A0', '\u10C5' + 1);
200 XML_NAME_ALLOWED_CHARACTERS.set('\u10D0', '\u10F6' + 1);
201 XML_NAME_ALLOWED_CHARACTERS.set('\u1100');
202 XML_NAME_ALLOWED_CHARACTERS.set('\u1102', '\u1103' + 1);
203 XML_NAME_ALLOWED_CHARACTERS.set('\u1105', '\u1107' + 1);
204 XML_NAME_ALLOWED_CHARACTERS.set('\u1109');
205 XML_NAME_ALLOWED_CHARACTERS.set('\u110B', '\u110C' + 1);
206 XML_NAME_ALLOWED_CHARACTERS.set('\u110E', '\u1112' + 1);
207 XML_NAME_ALLOWED_CHARACTERS.set('\u113C');
208 XML_NAME_ALLOWED_CHARACTERS.set('\u113E');
209 XML_NAME_ALLOWED_CHARACTERS.set('\u1140');
210 XML_NAME_ALLOWED_CHARACTERS.set('\u114C');
211 XML_NAME_ALLOWED_CHARACTERS.set('\u114E');
212 XML_NAME_ALLOWED_CHARACTERS.set('\u1150');
213 XML_NAME_ALLOWED_CHARACTERS.set('\u1154', '\u1155' + 1);
214 XML_NAME_ALLOWED_CHARACTERS.set('\u1159');
215 XML_NAME_ALLOWED_CHARACTERS.set('\u115F', '\u1161' + 1);
216 XML_NAME_ALLOWED_CHARACTERS.set('\u1163');
217 XML_NAME_ALLOWED_CHARACTERS.set('\u1165');
218 XML_NAME_ALLOWED_CHARACTERS.set('\u1167');
219 XML_NAME_ALLOWED_CHARACTERS.set('\u1169');
220 XML_NAME_ALLOWED_CHARACTERS.set('\u116D', '\u116E' + 1);
221 XML_NAME_ALLOWED_CHARACTERS.set('\u1172', '\u1173' + 1);
222 XML_NAME_ALLOWED_CHARACTERS.set('\u1175');
223 XML_NAME_ALLOWED_CHARACTERS.set('\u119E');
224 XML_NAME_ALLOWED_CHARACTERS.set('\u11A8');
225 XML_NAME_ALLOWED_CHARACTERS.set('\u11AB');
226 XML_NAME_ALLOWED_CHARACTERS.set('\u11AE', '\u11AF' + 1);
227 XML_NAME_ALLOWED_CHARACTERS.set('\u11B7', '\u11B8' + 1);
228 XML_NAME_ALLOWED_CHARACTERS.set('\u11BA');
229 XML_NAME_ALLOWED_CHARACTERS.set('\u11BC', '\u11C2' + 1);
230 XML_NAME_ALLOWED_CHARACTERS.set('\u11EB');
231 XML_NAME_ALLOWED_CHARACTERS.set('\u11F0');
232 XML_NAME_ALLOWED_CHARACTERS.set('\u11F9');
233 XML_NAME_ALLOWED_CHARACTERS.set('\u1E00', '\u1E9B' + 1);
234 XML_NAME_ALLOWED_CHARACTERS.set('\u1EA0', '\u1EF9' + 1);
235 XML_NAME_ALLOWED_CHARACTERS.set('\u1F00', '\u1F15' + 1);
236 XML_NAME_ALLOWED_CHARACTERS.set('\u1F18', '\u1F1D' + 1);
237 XML_NAME_ALLOWED_CHARACTERS.set('\u1F20', '\u1F45' + 1);
238 XML_NAME_ALLOWED_CHARACTERS.set('\u1F48', '\u1F4D' + 1);
239 XML_NAME_ALLOWED_CHARACTERS.set('\u1F50', '\u1F57' + 1);
240 XML_NAME_ALLOWED_CHARACTERS.set('\u1F59');
241 XML_NAME_ALLOWED_CHARACTERS.set('\u1F5B');
242 XML_NAME_ALLOWED_CHARACTERS.set('\u1F5D');
243 XML_NAME_ALLOWED_CHARACTERS.set('\u1F5F', '\u1F7D' + 1);
244 XML_NAME_ALLOWED_CHARACTERS.set('\u1F80', '\u1FB4' + 1);
245 XML_NAME_ALLOWED_CHARACTERS.set('\u1FB6', '\u1FBC' + 1);
246 XML_NAME_ALLOWED_CHARACTERS.set('\u1FBE');
247 XML_NAME_ALLOWED_CHARACTERS.set('\u1FC2', '\u1FC4' + 1);
248 XML_NAME_ALLOWED_CHARACTERS.set('\u1FC6', '\u1FCC' + 1);
249 XML_NAME_ALLOWED_CHARACTERS.set('\u1FD0', '\u1FD3' + 1);
250 XML_NAME_ALLOWED_CHARACTERS.set('\u1FD6', '\u1FDB' + 1);
251 XML_NAME_ALLOWED_CHARACTERS.set('\u1FE0', '\u1FEC' + 1);
252 XML_NAME_ALLOWED_CHARACTERS.set('\u1FF2', '\u1FF4' + 1);
253 XML_NAME_ALLOWED_CHARACTERS.set('\u1FF6', '\u1FFC' + 1);
254 XML_NAME_ALLOWED_CHARACTERS.set('\u2126');
255 XML_NAME_ALLOWED_CHARACTERS.set('\u212A', '\u212B' + 1);
256 XML_NAME_ALLOWED_CHARACTERS.set('\u212E');
257 XML_NAME_ALLOWED_CHARACTERS.set('\u2180', '\u2182' + 1);
258 XML_NAME_ALLOWED_CHARACTERS.set('\u3041', '\u3094' + 1);
259 XML_NAME_ALLOWED_CHARACTERS.set('\u30A1', '\u30FA' + 1);
260 XML_NAME_ALLOWED_CHARACTERS.set('\u3105', '\u312C' + 1);
261 XML_NAME_ALLOWED_CHARACTERS.set('\uAC00', '\uD7A3' + 1);
262
263
264
265 XML_NAME_ALLOWED_CHARACTERS.set('\u4E00', '\u9FA5' + 1);
266 XML_NAME_ALLOWED_CHARACTERS.set('\u3007');
267 XML_NAME_ALLOWED_CHARACTERS.set('\u3021', '\u3029' + 1);
268
269
270
271 XML_NAME_ALLOWED_CHARACTERS.set('\u0300', '\u0345' + 1);
272 XML_NAME_ALLOWED_CHARACTERS.set('\u0360', '\u0361' + 1);
273 XML_NAME_ALLOWED_CHARACTERS.set('\u0483', '\u0486' + 1);
274 XML_NAME_ALLOWED_CHARACTERS.set('\u0591', '\u05A1' + 1);
275 XML_NAME_ALLOWED_CHARACTERS.set('\u05A3', '\u05B9' + 1);
276 XML_NAME_ALLOWED_CHARACTERS.set('\u05BB', '\u05BD' + 1);
277 XML_NAME_ALLOWED_CHARACTERS.set('\u05BF');
278 XML_NAME_ALLOWED_CHARACTERS.set('\u05C1', '\u05C2' + 1);
279 XML_NAME_ALLOWED_CHARACTERS.set('\u05C4');
280 XML_NAME_ALLOWED_CHARACTERS.set('\u064B', '\u0652' + 1);
281 XML_NAME_ALLOWED_CHARACTERS.set('\u0670');
282 XML_NAME_ALLOWED_CHARACTERS.set('\u06D6', '\u06DC' + 1);
283 XML_NAME_ALLOWED_CHARACTERS.set('\u06DD', '\u06DF' + 1);
284 XML_NAME_ALLOWED_CHARACTERS.set('\u06E0', '\u06E4' + 1);
285 XML_NAME_ALLOWED_CHARACTERS.set('\u06E7', '\u06E8' + 1);
286 XML_NAME_ALLOWED_CHARACTERS.set('\u06EA', '\u06ED' + 1);
287 XML_NAME_ALLOWED_CHARACTERS.set('\u0901', '\u0903' + 1);
288 XML_NAME_ALLOWED_CHARACTERS.set('\u093C');
289 XML_NAME_ALLOWED_CHARACTERS.set('\u093E', '\u094C' + 1);
290 XML_NAME_ALLOWED_CHARACTERS.set('\u094D');
291 XML_NAME_ALLOWED_CHARACTERS.set('\u0951', '\u0954' + 1);
292 XML_NAME_ALLOWED_CHARACTERS.set('\u0962', '\u0963' + 1);
293 XML_NAME_ALLOWED_CHARACTERS.set('\u0981', '\u0983' + 1);
294 XML_NAME_ALLOWED_CHARACTERS.set('\u09BC');
295 XML_NAME_ALLOWED_CHARACTERS.set('\u09BE');
296 XML_NAME_ALLOWED_CHARACTERS.set('\u09BF');
297 XML_NAME_ALLOWED_CHARACTERS.set('\u09C0', '\u09C4' + 1);
298 XML_NAME_ALLOWED_CHARACTERS.set('\u09C7', '\u09C8' + 1);
299 XML_NAME_ALLOWED_CHARACTERS.set('\u09CB', '\u09CD' + 1);
300 XML_NAME_ALLOWED_CHARACTERS.set('\u09D7');
301 XML_NAME_ALLOWED_CHARACTERS.set('\u09E2', '\u09E3' + 1);
302 XML_NAME_ALLOWED_CHARACTERS.set('\u0A02');
303 XML_NAME_ALLOWED_CHARACTERS.set('\u0A3C');
304 XML_NAME_ALLOWED_CHARACTERS.set('\u0A3E');
305 XML_NAME_ALLOWED_CHARACTERS.set('\u0A3F');
306 XML_NAME_ALLOWED_CHARACTERS.set('\u0A40', '\u0A42' + 1);
307 XML_NAME_ALLOWED_CHARACTERS.set('\u0A47', '\u0A48' + 1);
308 XML_NAME_ALLOWED_CHARACTERS.set('\u0A4B', '\u0A4D' + 1);
309 XML_NAME_ALLOWED_CHARACTERS.set('\u0A70', '\u0A71' + 1);
310 XML_NAME_ALLOWED_CHARACTERS.set('\u0A81', '\u0A83' + 1);
311 XML_NAME_ALLOWED_CHARACTERS.set('\u0ABC');
312 XML_NAME_ALLOWED_CHARACTERS.set('\u0ABE', '\u0AC5' + 1);
313 XML_NAME_ALLOWED_CHARACTERS.set('\u0AC7', '\u0AC9' + 1);
314 XML_NAME_ALLOWED_CHARACTERS.set('\u0ACB', '\u0ACD' + 1);
315 XML_NAME_ALLOWED_CHARACTERS.set('\u0B01', '\u0B03' + 1);
316 XML_NAME_ALLOWED_CHARACTERS.set('\u0B3C');
317 XML_NAME_ALLOWED_CHARACTERS.set('\u0B3E', '\u0B43' + 1);
318 XML_NAME_ALLOWED_CHARACTERS.set('\u0B47', '\u0B48' + 1);
319 XML_NAME_ALLOWED_CHARACTERS.set('\u0B4B', '\u0B4D' + 1);
320 XML_NAME_ALLOWED_CHARACTERS.set('\u0B56', '\u0B57' + 1);
321 XML_NAME_ALLOWED_CHARACTERS.set('\u0B82', '\u0B83' + 1);
322 XML_NAME_ALLOWED_CHARACTERS.set('\u0BBE', '\u0BC2' + 1);
323 XML_NAME_ALLOWED_CHARACTERS.set('\u0BC6', '\u0BC8' + 1);
324 XML_NAME_ALLOWED_CHARACTERS.set('\u0BCA', '\u0BCD' + 1);
325 XML_NAME_ALLOWED_CHARACTERS.set('\u0BD7');
326 XML_NAME_ALLOWED_CHARACTERS.set('\u0C01', '\u0C03' + 1);
327 XML_NAME_ALLOWED_CHARACTERS.set('\u0C3E', '\u0C44' + 1);
328 XML_NAME_ALLOWED_CHARACTERS.set('\u0C46', '\u0C48' + 1);
329 XML_NAME_ALLOWED_CHARACTERS.set('\u0C4A', '\u0C4D' + 1);
330 XML_NAME_ALLOWED_CHARACTERS.set('\u0C55', '\u0C56' + 1);
331 XML_NAME_ALLOWED_CHARACTERS.set('\u0C82', '\u0C83' + 1);
332 XML_NAME_ALLOWED_CHARACTERS.set('\u0CBE', '\u0CC4' + 1);
333 XML_NAME_ALLOWED_CHARACTERS.set('\u0CC6', '\u0CC8' + 1);
334 XML_NAME_ALLOWED_CHARACTERS.set('\u0CCA', '\u0CCD' + 1);
335 XML_NAME_ALLOWED_CHARACTERS.set('\u0CD5', '\u0CD6' + 1);
336 XML_NAME_ALLOWED_CHARACTERS.set('\u0D02', '\u0D03' + 1);
337 XML_NAME_ALLOWED_CHARACTERS.set('\u0D3E', '\u0D43' + 1);
338 XML_NAME_ALLOWED_CHARACTERS.set('\u0D46', '\u0D48' + 1);
339 XML_NAME_ALLOWED_CHARACTERS.set('\u0D4A', '\u0D4D' + 1);
340 XML_NAME_ALLOWED_CHARACTERS.set('\u0D57');
341 XML_NAME_ALLOWED_CHARACTERS.set('\u0E31');
342 XML_NAME_ALLOWED_CHARACTERS.set('\u0E34', '\u0E3A' + 1);
343 XML_NAME_ALLOWED_CHARACTERS.set('\u0E47', '\u0E4E' + 1);
344 XML_NAME_ALLOWED_CHARACTERS.set('\u0EB1');
345 XML_NAME_ALLOWED_CHARACTERS.set('\u0EB4', '\u0EB9' + 1);
346 XML_NAME_ALLOWED_CHARACTERS.set('\u0EBB', '\u0EBC' + 1);
347 XML_NAME_ALLOWED_CHARACTERS.set('\u0EC8', '\u0ECD' + 1);
348 XML_NAME_ALLOWED_CHARACTERS.set('\u0F18', '\u0F19' + 1);
349 XML_NAME_ALLOWED_CHARACTERS.set('\u0F35');
350 XML_NAME_ALLOWED_CHARACTERS.set('\u0F37');
351 XML_NAME_ALLOWED_CHARACTERS.set('\u0F39');
352 XML_NAME_ALLOWED_CHARACTERS.set('\u0F3E');
353 XML_NAME_ALLOWED_CHARACTERS.set('\u0F3F');
354 XML_NAME_ALLOWED_CHARACTERS.set('\u0F71', '\u0F84' + 1);
355 XML_NAME_ALLOWED_CHARACTERS.set('\u0F86', '\u0F8B' + 1);
356 XML_NAME_ALLOWED_CHARACTERS.set('\u0F90', '\u0F95' + 1);
357 XML_NAME_ALLOWED_CHARACTERS.set('\u0F97');
358 XML_NAME_ALLOWED_CHARACTERS.set('\u0F99', '\u0FAD' + 1);
359 XML_NAME_ALLOWED_CHARACTERS.set('\u0FB1', '\u0FB7' + 1);
360 XML_NAME_ALLOWED_CHARACTERS.set('\u0FB9');
361 XML_NAME_ALLOWED_CHARACTERS.set('\u20D0', '\u20DC' + 1);
362 XML_NAME_ALLOWED_CHARACTERS.set('\u20E1');
363 XML_NAME_ALLOWED_CHARACTERS.set('\u302A', '\u302F' + 1);
364 XML_NAME_ALLOWED_CHARACTERS.set('\u3099');
365 XML_NAME_ALLOWED_CHARACTERS.set('\u309A');
366
367
368 XML_NAME_ALLOWED_CHARACTERS.set('\u0030', '\u0039' + 1);
369 XML_NAME_ALLOWED_CHARACTERS.set('\u0660', '\u0669' + 1);
370 XML_NAME_ALLOWED_CHARACTERS.set('\u06F0', '\u06F9' + 1);
371 XML_NAME_ALLOWED_CHARACTERS.set('\u0966', '\u096F' + 1);
372 XML_NAME_ALLOWED_CHARACTERS.set('\u09E6', '\u09EF' + 1);
373 XML_NAME_ALLOWED_CHARACTERS.set('\u0A66', '\u0A6F' + 1);
374 XML_NAME_ALLOWED_CHARACTERS.set('\u0AE6', '\u0AEF' + 1);
375 XML_NAME_ALLOWED_CHARACTERS.set('\u0B66', '\u0B6F' + 1);
376 XML_NAME_ALLOWED_CHARACTERS.set('\u0BE7', '\u0BEF' + 1);
377 XML_NAME_ALLOWED_CHARACTERS.set('\u0C66', '\u0C6F' + 1);
378 XML_NAME_ALLOWED_CHARACTERS.set('\u0CE6', '\u0CEF' + 1);
379 XML_NAME_ALLOWED_CHARACTERS.set('\u0D66', '\u0D6F' + 1);
380 XML_NAME_ALLOWED_CHARACTERS.set('\u0E50', '\u0E59' + 1);
381 XML_NAME_ALLOWED_CHARACTERS.set('\u0ED0', '\u0ED9' + 1);
382 XML_NAME_ALLOWED_CHARACTERS.set('\u0F20', '\u0F29' + 1);
383
384
385 XML_NAME_ALLOWED_CHARACTERS.set('\u00B7');
386 XML_NAME_ALLOWED_CHARACTERS.set('\u02D0');
387 XML_NAME_ALLOWED_CHARACTERS.set('\u02D1');
388 XML_NAME_ALLOWED_CHARACTERS.set('\u0387');
389 XML_NAME_ALLOWED_CHARACTERS.set('\u0640');
390 XML_NAME_ALLOWED_CHARACTERS.set('\u0E46');
391 XML_NAME_ALLOWED_CHARACTERS.set('\u0EC6');
392 XML_NAME_ALLOWED_CHARACTERS.set('\u3005');
393 XML_NAME_ALLOWED_CHARACTERS.set('\u3031', '\u3035' + 1);
394 XML_NAME_ALLOWED_CHARACTERS.set('\u309D', '\u309E' + 1);
395 XML_NAME_ALLOWED_CHARACTERS.set('\u30FC', '\u30FE' + 1);
396 }
397
398
399
400
401
402
403 public String decode( String encodedText ) {
404 if (encodedText == null) return null;
405 if (encodedText.length() < 7) {
406
407 return encodedText;
408 }
409 StringBuilder sb = new StringBuilder();
410 char[] digits = new char[4];
411 CharacterIterator iter = new StringCharacterIterator(encodedText);
412 for (char c = iter.first(); c != CharacterIterator.DONE; c = iter.next()) {
413 if (c == '_') {
414
415 char next = iter.next();
416 if (next == CharacterIterator.DONE) {
417 sb.append(c);
418 break;
419 }
420
421 if (next != 'x') {
422 sb.append(c).append(next);
423 continue;
424 }
425
426 digits[0] = iter.next();
427 if (digits[0] == CharacterIterator.DONE) {
428 sb.append(c).append(next);
429 break;
430 }
431 digits[1] = iter.next();
432 if (digits[1] == CharacterIterator.DONE) {
433 sb.append(c).append(next).append(digits, 0, 1);
434 break;
435 }
436 digits[2] = iter.next();
437 if (digits[2] == CharacterIterator.DONE) {
438 sb.append(c).append(next).append(digits, 0, 2);
439 break;
440 }
441 digits[3] = iter.next();
442 if (digits[3] == CharacterIterator.DONE) {
443 sb.append(c).append(next).append(digits, 0, 3);
444 break;
445 }
446 char underscore = iter.next();
447 if (underscore != '_') {
448 sb.append(c).append(next).append(digits, 0, 4);
449 if (underscore == CharacterIterator.DONE) break;
450 sb.append(underscore);
451 continue;
452 }
453
454
455 try {
456 sb.appendCodePoint(Integer.parseInt(new String(digits), 16));
457 } catch (NumberFormatException e) {
458
459 sb.append(c).append(next).append(digits).append(underscore);
460 }
461 } else {
462
463 sb.append(c);
464 }
465 }
466 return sb.toString();
467 }
468
469
470
471
472
473
474 public String encode( String text ) {
475 if (text == null) return null;
476 if (text.length() == 0) return text;
477 StringBuilder sb = new StringBuilder();
478 String hex = null;
479 CharacterIterator iter = new StringCharacterIterator(text);
480 for (char c = iter.first(); c != CharacterIterator.DONE; c = iter.next()) {
481 if (c == '_') {
482
483 char next = iter.next();
484 if (next == CharacterIterator.DONE) {
485 sb.append(c);
486 break;
487 }
488
489 if (next != 'x') {
490 sb.append(c).append(next);
491 continue;
492 }
493
494 sb.append("_x005f_");
495
496 sb.append(next);
497 } else if (XML_NAME_ALLOWED_CHARACTERS.get(c)) {
498
499 sb.append(c);
500 } else {
501
502 hex = Integer.toHexString(c);
503
504 if (c >= '\u0000' && c <= '\u000f') {
505 sb.append("_x000").append(hex);
506 } else if (c >= '\u0010' && c <= '\u00ff') {
507 sb.append("_x00").append(hex);
508 } else if (c >= '\u0100' && c <= '\u0fff') {
509 sb.append("_x0").append(hex);
510 } else {
511 sb.append("_x").append(hex);
512 }
513 sb.append('_');
514 }
515 }
516 return sb.toString();
517 }
518
519 }