001 /*
002 * JBoss, Home of Professional Open Source.
003 * Copyright 2008, Red Hat Middleware LLC, and individual contributors
004 * as indicated by the @author tags. See the copyright.txt file in the
005 * distribution for a full listing of individual contributors.
006 *
007 * This is free software; you can redistribute it and/or modify it
008 * under the terms of the GNU Lesser General Public License as
009 * published by the Free Software Foundation; either version 2.1 of
010 * the License, or (at your option) any later version.
011 *
012 * This software is distributed in the hope that it will be useful,
013 * but WITHOUT ANY WARRANTY; without even the implied warranty of
014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
015 * Lesser General Public License for more details.
016 *
017 * You should have received a copy of the GNU Lesser General Public
018 * License along with this software; if not, write to the Free
019 * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
020 * 02110-1301 USA, or see the FSF site: http://www.fsf.org.
021 */
022 package org.jboss.dna.graph.xml;
023
024 import java.util.ArrayList;
025 import java.util.HashMap;
026 import java.util.LinkedList;
027 import java.util.List;
028 import java.util.Map;
029 import javax.xml.parsers.SAXParser;
030 import net.jcip.annotations.NotThreadSafe;
031 import org.jboss.dna.common.text.TextDecoder;
032 import org.jboss.dna.common.text.XmlNameEncoder;
033 import org.jboss.dna.common.util.CheckArg;
034 import org.jboss.dna.graph.BasicExecutionContext;
035 import org.jboss.dna.graph.ExecutionContext;
036 import org.jboss.dna.graph.properties.Name;
037 import org.jboss.dna.graph.properties.NameFactory;
038 import org.jboss.dna.graph.properties.NamespaceRegistry;
039 import org.jboss.dna.graph.properties.Path;
040 import org.jboss.dna.graph.properties.PathFactory;
041 import org.jboss.dna.graph.properties.Property;
042 import org.jboss.dna.graph.properties.PropertyFactory;
043 import org.jboss.dna.graph.properties.basic.LocalNamespaceRegistry;
044 import org.xml.sax.Attributes;
045 import org.xml.sax.ext.DefaultHandler2;
046
047 /**
048 * A {@link DefaultHandler2} specialization that responds to XML content events by creating the corresponding content in the
049 * supplied graph. This implementation ignores DTD entities, XML contents, and other XML processing instructions. If other
050 * behavior is required, the appropriate methods can be overridden. (Which is why this class extends <code>DefaultHandler2</code>,
051 * which has support for processing all the different parts of XML.
052 * <p>
053 * This class can be passed to the {@link SAXParser}'s {@link SAXParser#parse(java.io.File, org.xml.sax.helpers.DefaultHandler)
054 * parse(..,DefaultHandler)} methods.
055 * </p>
056 *
057 * @author Randall Hauch
058 */
059 @NotThreadSafe
060 public class XmlHandler extends DefaultHandler2 {
061
062 /**
063 * The choices for how attributes that have no namespace prefix should be assigned a namespace.
064 *
065 * @author Randall Hauch
066 */
067 public enum AttributeScoping {
068 /** The attribute's namespace is the default namespace */
069 USE_DEFAULT_NAMESPACE,
070 /** The attribute's namespace is the same namespace as the containing element */
071 INHERIT_ELEMENT_NAMESPACE;
072 }
073
074 private final ExecutionContext context;
075
076 /**
077 * Decoder for XML names, to turn '_xHHHH_' sequences in the XML element and attribute names into the corresponding UTF-16
078 * characters.
079 */
080 public static TextDecoder DEFAULT_DECODER = new XmlNameEncoder();
081
082 /**
083 * The default {@link AttributeScoping}.
084 */
085 public static AttributeScoping DEFAULT_ATTRIBUTE_SCOPING = AttributeScoping.USE_DEFAULT_NAMESPACE;
086
087 /**
088 * The destination where the content should be sent.
089 */
090 protected final Destination destination;
091
092 /**
093 * The name of the XML attribute whose value should be used for the name of the node. For example, "jcr:name".
094 */
095 protected final Name nameAttribute;
096
097 /**
098 * The name of the property that is to be set with the type of the XML element. For example, "jcr:name".
099 */
100 protected final Name typeAttribute;
101
102 /**
103 * The value of the node type property, if the node's name is set with the {@link #nameAttribute}.
104 */
105 protected final Name typeAttributeValue;
106
107 /**
108 * The cached reference to the graph's path factory.
109 */
110 protected final PathFactory pathFactory;
111
112 /**
113 * The cached reference to the graph's name factory.
114 */
115 protected final NameFactory nameFactory;
116
117 /**
118 * The cached reference to the graph's property factory.
119 */
120 protected final PropertyFactory propertyFactory;
121
122 /**
123 * The cached reference to the graph's namespace registry.
124 */
125 protected final NamespaceRegistry namespaceRegistry;
126
127 /**
128 * The TextDecoder that is used to decode the names.
129 */
130 protected final TextDecoder decoder;
131
132 /**
133 * The stack of prefixes for each namespace, which is used to keep the {@link #namespaceRegistry local namespace registry} in
134 * sync with the namespaces in the XML document.
135 */
136 private final Map<String, LinkedList<String>> prefixStackByUri = new HashMap<String, LinkedList<String>>();
137
138 private final AttributeScoping attributeScoping;
139
140 /**
141 * The path for the node representing the current element. This starts out as the path supplied by the constructor, and never
142 * is shorter than that initial path.
143 */
144 protected Path currentPath;
145
146 /**
147 * Flag the records whether the first element should be skipped.
148 */
149 protected boolean skipFirstElement;
150
151 /**
152 * A temporary list used to store the properties for a single node. This is cleared, populated, then used to create the node.
153 */
154 protected final List<Property> properties = new ArrayList<Property>();
155
156 /**
157 * A working array that contains a single value object that is used to create Property objects (without having to create an
158 * array of values for each property).
159 */
160 protected final Object[] propertyValues = new Object[1];
161
162 /**
163 * Create a handler that creates content in the supplied graph
164 *
165 * @param destination the destination where the content should be sent.graph in which the content should be placed
166 * @param skipRootElement true if the root element of the document should be skipped, or false if the root element should be
167 * converted to the top-level node of the content
168 * @param parent the path to the node in the graph under which the content should be placed; if null, the root node is assumed
169 * @param textDecoder the text decoder that should be used to decode the XML element names and XML attribute names, prior to
170 * using those values to create names; or null if the default encoder should be used
171 * @param nameAttribute the name of the property whose value should be used for the names of the nodes (typically, this is
172 * "jcr:name" or something equivalent); or null if the XML element name should always be used as the node name
173 * @param typeAttribute the name of the property that should be set with the type of the XML element, or null if there is no
174 * such property
175 * @param typeAttributeValue the value of the type property that should be used if the node has no <code>nameAttribute</code>,
176 * or null if the value should be set to the type of the XML element
177 * @param scoping defines how to choose the namespace of attributes that do not have a namespace prefix; if null, the
178 * {@link #DEFAULT_ATTRIBUTE_SCOPING} value is used
179 * @throws IllegalArgumentException if the destination reference is null
180 */
181 public XmlHandler( Destination destination,
182 boolean skipRootElement,
183 Path parent,
184 TextDecoder textDecoder,
185 Name nameAttribute,
186 Name typeAttribute,
187 Name typeAttributeValue,
188 AttributeScoping scoping ) {
189 CheckArg.isNotNull(destination, "destination");
190 assert destination != null;
191 this.destination = destination;
192 this.nameAttribute = nameAttribute;
193 this.typeAttribute = typeAttribute;
194 this.typeAttributeValue = typeAttributeValue;
195 this.decoder = textDecoder != null ? textDecoder : DEFAULT_DECODER;
196 this.skipFirstElement = skipRootElement;
197 this.attributeScoping = scoping != null ? scoping : DEFAULT_ATTRIBUTE_SCOPING;
198
199 // Use the execution context ...
200 this.context = destination.getExecutionContext();
201 assert this.context != null;
202
203 // Set up a local namespace registry that is kept in sync with the namespaces found in this XML document ...
204 NamespaceRegistry namespaceRegistry = new LocalNamespaceRegistry(this.context.getNamespaceRegistry());
205 final ExecutionContext localContext = new BasicExecutionContext(this.context, namespaceRegistry);
206
207 // Set up references to frequently-used objects in the context ...
208 this.nameFactory = localContext.getValueFactories().getNameFactory();
209 this.pathFactory = localContext.getValueFactories().getPathFactory();
210 this.propertyFactory = localContext.getPropertyFactory();
211 this.namespaceRegistry = localContext.getNamespaceRegistry();
212 assert this.nameFactory != null;
213 assert this.pathFactory != null;
214 assert this.propertyFactory != null;
215 assert this.namespaceRegistry != null;
216
217 // Set up the initial path ...
218 this.currentPath = parent != null ? parent : this.pathFactory.createRootPath();
219 assert this.currentPath != null;
220 }
221
222 /**
223 * {@inheritDoc}
224 * <p>
225 * This method ensures that the namespace is registered with the {@link NamespaceRegistry registry}, using the supplied prefix
226 * to register the namespace if required. Note that because this class does not really use the namespace prefixes to create
227 * {@link Name} objects, no attempt is made to match the XML namespace prefixes.
228 * </p>
229 *
230 * @see org.xml.sax.helpers.DefaultHandler#startPrefixMapping(java.lang.String, java.lang.String)
231 */
232 @Override
233 public void startPrefixMapping( String prefix,
234 String uri ) {
235 assert uri != null;
236 // Add the prefix to the stack ...
237 LinkedList<String> prefixStack = this.prefixStackByUri.get(uri);
238 if (prefixStack == null) {
239 prefixStack = new LinkedList<String>();
240 this.prefixStackByUri.put(uri, prefixStack);
241 }
242 prefixStack.addFirst(prefix);
243
244 // If the namespace is already registered, then we'll have to register it in the context's registry, too.
245 if (!namespaceRegistry.isRegisteredNamespaceUri(uri)) {
246 // The namespace is not already registered (locally or in the context's registry), so we have to
247 // register it with the context's registry (which the local register then inherits).
248 NamespaceRegistry contextRegistry = context.getNamespaceRegistry();
249 if (contextRegistry.getNamespaceForPrefix(prefix) != null) {
250 // The prefix is already bound, so register and generate a unique prefix
251 context.getNamespaceRegistry().getPrefixForNamespaceUri(uri, true);
252 // Now register locally with the supplied prefix ...
253 namespaceRegistry.register(prefix, uri);
254 } else {
255 context.getNamespaceRegistry().register(prefix, uri);
256 }
257 } else {
258 // It is already registered, but re-register it locally using the supplied prefix ...
259 namespaceRegistry.register(prefix, uri);
260 }
261 }
262
263 /**
264 * {@inheritDoc}
265 *
266 * @see org.xml.sax.helpers.DefaultHandler#endPrefixMapping(java.lang.String)
267 */
268 @Override
269 public void endPrefixMapping( String prefix ) {
270 assert prefix != null;
271 // Get the current URI for this prefix ...
272 String uri = namespaceRegistry.getNamespaceForPrefix(prefix);
273 assert uri != null;
274
275 // Get the previous prefix from the stack ...
276 LinkedList<String> prefixStack = this.prefixStackByUri.get(uri);
277 assert prefixStack != null;
278 assert !prefixStack.isEmpty();
279 String existingPrefix = prefixStack.removeFirst();
280 assert prefix.equals(existingPrefix);
281
282 // If there are no previous prefixes, then remove the mapping ...
283 if (prefixStack.isEmpty()) {
284 namespaceRegistry.unregister(uri);
285 prefixStackByUri.remove(uri);
286 } else {
287 String previous = prefixStack.getFirst();
288 namespaceRegistry.register(previous, uri);
289 }
290 }
291
292 /**
293 * {@inheritDoc}
294 *
295 * @see org.xml.sax.helpers.DefaultHandler#startElement(java.lang.String, java.lang.String, java.lang.String,
296 * org.xml.sax.Attributes)
297 */
298 @Override
299 public void startElement( String uri,
300 String localName,
301 String name,
302 Attributes attributes ) {
303 // Should this (root) element be skipped?
304 if (skipFirstElement) {
305 skipFirstElement = false;
306 return;
307 }
308 assert localName != null;
309 Name nodeName = null;
310
311 properties.clear();
312 Object typePropertyValue = null;
313 // Convert each of the attributes to a property ...
314 for (int i = 0, len = attributes.getLength(); i != len; ++i) {
315 String attributeLocalName = attributes.getLocalName(i);
316 String attributeUri = attributes.getURI(i);
317 Name attributeName = null;
318 if ((attributeUri == null || attributeUri.length() == 0) && attributes.getQName(i).indexOf(':') == -1) {
319 switch (this.attributeScoping) {
320 case INHERIT_ELEMENT_NAMESPACE:
321 attributeName = nameFactory.create(uri, attributeLocalName, decoder);
322 break;
323 case USE_DEFAULT_NAMESPACE:
324 attributeName = nameFactory.create(attributeLocalName, decoder);
325 break;
326 }
327 } else {
328 attributeName = nameFactory.create(attributeUri, attributeLocalName, decoder);
329 }
330 assert attributeName != null;
331 // Check to see if this is an attribute that represents the node name (which may be null) ...
332 if (nodeName == null && attributeName.equals(nameAttribute)) {
333 nodeName = nameFactory.create(attributes.getValue(i)); // don't use a decoder
334 continue;
335 }
336 if (typePropertyValue == null && attributeName.equals(typeAttribute)) {
337 typePropertyValue = nameFactory.create(attributes.getValue(i)); // don't use a decoder
338 continue;
339 }
340 // Create a property for this attribute ...
341 Property property = createProperty(attributeName, attributes.getValue(i));
342 properties.add(property);
343 }
344 // Create the node name if required ...
345 if (nodeName == null) {
346 // No attribute defines the node name ...
347 nodeName = nameFactory.create(uri, localName, decoder);
348 } else {
349 typePropertyValue = nameFactory.create(uri, localName, decoder);
350 }
351 if (typeAttribute != null) {
352 // A attribute defines the node name. Set the type property, if required
353 if (typePropertyValue == null) typePropertyValue = typeAttributeValue;
354 if (typePropertyValue != null) {
355 propertyValues[0] = typePropertyValue;
356 Property property = propertyFactory.create(typeAttribute, propertyValues);
357 properties.add(property);
358 }
359 }
360 // Update the current path ...
361 currentPath = pathFactory.create(currentPath, nodeName);
362 // Create the node, and note that we don't care about same-name siblings (as the graph will correct them) ...
363 destination.create(currentPath, properties);
364 }
365
366 /**
367 * {@inheritDoc}
368 *
369 * @see org.xml.sax.helpers.DefaultHandler#endElement(java.lang.String, java.lang.String, java.lang.String)
370 */
371 @Override
372 public void endElement( String uri,
373 String localName,
374 String name ) {
375 // Nothing to do but to change the current path to be the parent ...
376 currentPath = currentPath.getParent();
377 }
378
379 /**
380 * {@inheritDoc}
381 *
382 * @see org.xml.sax.helpers.DefaultHandler#endDocument()
383 */
384 @Override
385 public void endDocument() {
386 // Submit any outstanding requests (if there are any) ...
387 destination.submit();
388 }
389
390 /**
391 * Create a property with the given name and value, obtained from an attribute name and value in the XML content.
392 * <p>
393 * By default, this method creates a property by directly using the value as the sole value of the property.
394 * </p>
395 *
396 * @param propertyName the name of the property; never null
397 * @param value the attribute value
398 * @return the property; may not be null
399 */
400 protected Property createProperty( Name propertyName,
401 Object value ) {
402 propertyValues[0] = value;
403 Property result = propertyFactory.create(propertyName, propertyValues);
404 return result;
405 }
406
407 /**
408 * Interface used internally as the destination for the requests. This is used to abstract whether the requests should be
409 * submitted immediately or in a single batch.
410 *
411 * @author Randall Hauch
412 */
413 @NotThreadSafe
414 public static interface Destination {
415
416 /**
417 * Obtain the execution context of the destination.
418 *
419 * @return the destination's execution context
420 */
421 public ExecutionContext getExecutionContext();
422
423 /**
424 * Create a node at the supplied path and with the supplied attributes. The path will be absolute.
425 *
426 * @param path the absolute path of the node
427 * @param properties the properties for the node; never null, but may be empty if there are no properties
428 */
429 public void create( Path path,
430 List<Property> properties );
431
432 /**
433 * Create a node at the supplied path and with the supplied attributes. The path will be absolute.
434 *
435 * @param path the absolute path of the node
436 * @param firstProperty the first property
437 * @param additionalProperties the remaining properties for the node
438 */
439 public void create( Path path,
440 Property firstProperty,
441 Property... additionalProperties );
442
443 /**
444 * Signal to this destination that any enqueued create requests should be submitted. Usually this happens at the end of
445 * the document parsing, but an implementer must allow for it to be called multiple times and anytime during parsing.
446 */
447 public void submit();
448 }
449 }