1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24 package org.modeshape.common.xml;
25
26 import java.text.CharacterIterator;
27 import java.text.StringCharacterIterator;
28 import net.jcip.annotations.Immutable;
29
30
31
32
33
34 @Immutable
35 public class XmlCharacters {
36
37 private static final int NUMBER_OF_CHARACTERS = 1 << 16;
38
39
40
41
42
43 private static final char[] MASKS = new char[NUMBER_OF_CHARACTERS];
44
45 private static final int VALID_CHARACTER = 1;
46 private static final int CONTENT_CHARACTER = 1 << 1;
47 private static final int SPACE_CHARACTER = 1 << 2;
48 private static final int NAME_START_CHARACTER = 1 << 3;
49 private static final int NAME_CHARACTER = 1 << 4;
50 private static final int NCNAME_START_CHARACTER = 1 << 5;
51 private static final int NCNAME_CHARACTER = 1 << 6;
52 private static final int PUBID_CHARACTER = 1 << 7;
53
54 static {
55
56
57
58
59
60
61 MASKS[0x9] |= VALID_CHARACTER | CONTENT_CHARACTER;
62 MASKS[0xA] |= VALID_CHARACTER | CONTENT_CHARACTER;
63 MASKS[0xD] |= VALID_CHARACTER | CONTENT_CHARACTER;
64 for (int i = 0x20; i <= 0xD7FF; ++i)
65 MASKS[i] |= VALID_CHARACTER | CONTENT_CHARACTER;
66 for (int i = 0xE000; i <= 0xFFFD; ++i)
67 MASKS[i] |= VALID_CHARACTER | CONTENT_CHARACTER;
68
69
70
71
72
73 MASKS['<'] &= ~(CONTENT_CHARACTER);
74 MASKS['&'] &= ~(CONTENT_CHARACTER);
75 MASKS['\n'] &= ~(CONTENT_CHARACTER);
76 MASKS['\r'] &= ~(CONTENT_CHARACTER);
77 MASKS[']'] &= ~(CONTENT_CHARACTER);
78
79
80
81
82
83
84 MASKS[0x20] |= SPACE_CHARACTER;
85 MASKS[0x9] |= SPACE_CHARACTER;
86 MASKS[0xA] |= SPACE_CHARACTER;
87 MASKS[0xD] |= SPACE_CHARACTER;
88
89
90
91
92
93
94
95
96
97
98
99 int nameStartMask = NAME_START_CHARACTER | NCNAME_START_CHARACTER | NAME_CHARACTER | NCNAME_CHARACTER;
100 MASKS[':'] |= nameStartMask;
101 MASKS['_'] |= nameStartMask;
102 for (int i = 'A'; i <= 'Z'; ++i)
103 MASKS[i] |= nameStartMask;
104 for (int i = 'a'; i <= 'z'; ++i)
105 MASKS[i] |= nameStartMask;
106 for (int i = 0xC0; i <= 0xD6; ++i)
107 MASKS[i] |= nameStartMask;
108 for (int i = 0xD8; i <= 0xF6; ++i)
109 MASKS[i] |= nameStartMask;
110 for (int i = 0xF8; i <= 0x2FF; ++i)
111 MASKS[i] |= nameStartMask;
112 for (int i = 0x370; i <= 0x37D; ++i)
113 MASKS[i] |= nameStartMask;
114 for (int i = 0x37F; i <= 0x1FFF; ++i)
115 MASKS[i] |= nameStartMask;
116 for (int i = 0x200C; i <= 0x200D; ++i)
117 MASKS[i] |= nameStartMask;
118 for (int i = 0x2070; i <= 0x218F; ++i)
119 MASKS[i] |= nameStartMask;
120 for (int i = 0x2C00; i <= 0x2FEF; ++i)
121 MASKS[i] |= nameStartMask;
122 for (int i = 0x3001; i <= 0xD7FF; ++i)
123 MASKS[i] |= nameStartMask;
124 for (int i = 0xF900; i <= 0xFDCF; ++i)
125 MASKS[i] |= nameStartMask;
126 for (int i = 0xFDF0; i <= 0xFFFD; ++i)
127 MASKS[i] |= nameStartMask;
128
129
130
131
132
133
134
135
136
137
138 int nameMask = NAME_CHARACTER | NCNAME_CHARACTER;
139 MASKS['-'] |= nameMask;
140 MASKS['.'] |= nameMask;
141 MASKS[0xB7] |= nameMask;
142 for (int i = '0'; i <= '9'; ++i)
143 MASKS[i] |= nameMask;
144 for (int i = 0x0300; i <= 0x036F; ++i)
145 MASKS[i] |= nameStartMask;
146 for (int i = 0x203F; i <= 0x2040; ++i)
147 MASKS[i] |= nameStartMask;
148
149
150
151
152
153
154
155
156 MASKS[':'] &= ~(NCNAME_START_CHARACTER | NCNAME_CHARACTER);
157
158
159
160
161
162 MASKS[0x20] |= PUBID_CHARACTER;
163 MASKS[0xA] |= PUBID_CHARACTER;
164 MASKS[0xD] |= PUBID_CHARACTER;
165 for (int i = 'A'; i <= 'Z'; ++i)
166 MASKS[i] |= PUBID_CHARACTER;
167 for (int i = 'a'; i <= 'z'; ++i)
168 MASKS[i] |= PUBID_CHARACTER;
169 for (int i = '0'; i <= '9'; ++i)
170 MASKS[i] |= PUBID_CHARACTER;
171 MASKS['-'] |= PUBID_CHARACTER;
172 MASKS['\''] |= PUBID_CHARACTER;
173 MASKS['('] |= PUBID_CHARACTER;
174 MASKS[')'] |= PUBID_CHARACTER;
175 MASKS['+'] |= PUBID_CHARACTER;
176 MASKS[','] |= PUBID_CHARACTER;
177 MASKS['.'] |= PUBID_CHARACTER;
178 MASKS['/'] |= PUBID_CHARACTER;
179 MASKS[':'] |= PUBID_CHARACTER;
180 MASKS['='] |= PUBID_CHARACTER;
181 MASKS['?'] |= PUBID_CHARACTER;
182 MASKS[';'] |= PUBID_CHARACTER;
183 MASKS['!'] |= PUBID_CHARACTER;
184 MASKS['*'] |= PUBID_CHARACTER;
185 MASKS['#'] |= PUBID_CHARACTER;
186 MASKS['@'] |= PUBID_CHARACTER;
187 MASKS['$'] |= PUBID_CHARACTER;
188 MASKS['_'] |= PUBID_CHARACTER;
189 MASKS['%'] |= PUBID_CHARACTER;
190
191 }
192
193 private XmlCharacters() {
194 }
195
196
197
198
199
200
201
202
203 public static boolean isValidNameStart( int c ) {
204 return c < NUMBER_OF_CHARACTERS && (MASKS[c] & NAME_START_CHARACTER) != 0;
205 }
206
207
208
209
210
211
212
213
214 public static boolean isValidNcNameStart( int c ) {
215 return c < NUMBER_OF_CHARACTERS && (MASKS[c] & NCNAME_START_CHARACTER) != 0;
216 }
217
218
219
220
221
222
223
224
225 public static boolean isValidName( int c ) {
226 return c < NUMBER_OF_CHARACTERS && (MASKS[c] & NAME_CHARACTER) != 0;
227 }
228
229
230
231
232
233
234
235
236 public static boolean isValidNcName( int c ) {
237 return c < NUMBER_OF_CHARACTERS && (MASKS[c] & NCNAME_CHARACTER) != 0;
238 }
239
240
241
242
243
244
245
246 public static boolean isValidPubid( int c ) {
247 return c < NUMBER_OF_CHARACTERS && (MASKS[c] & PUBID_CHARACTER) != 0;
248 }
249
250
251
252
253
254
255
256 public static boolean isValid( int c ) {
257 return (c < NUMBER_OF_CHARACTERS && (MASKS[c] & VALID_CHARACTER) != 0) || (0x10000 <= c && c <= 0x10FFFF);
258 }
259
260
261
262
263
264
265
266 public static boolean isValidContent( int c ) {
267 return (c < NUMBER_OF_CHARACTERS && (MASKS[c] & CONTENT_CHARACTER) != 0) || (0x10000 <= c && c <= 0x10FFFF);
268 }
269
270
271
272
273
274
275
276 public static boolean isValidSpace( int c ) {
277 return c <= 0x20 && (MASKS[c] & SPACE_CHARACTER) != 0;
278 }
279
280
281
282
283
284
285
286 public static boolean isValidName( String name ) {
287 if (name == null || name.length() == 0) return false;
288 CharacterIterator iter = new StringCharacterIterator(name);
289 char c = iter.first();
290 if (!isValidNameStart(c)) return false;
291 while (c != CharacterIterator.DONE) {
292 if (!isValidName(c)) return false;
293 c = iter.next();
294 }
295 return true;
296 }
297
298
299
300
301
302
303
304 public static boolean isValidNcName( String name ) {
305 if (name == null || name.length() == 0) return false;
306 CharacterIterator iter = new StringCharacterIterator(name);
307 char c = iter.first();
308 if (!isValidNcNameStart(c)) return false;
309 while (c != CharacterIterator.DONE) {
310 if (!isValidNcName(c)) return false;
311 c = iter.next();
312 }
313 return true;
314 }
315 }