001 /*
002 * JBoss DNA (http://www.jboss.org/dna)
003 * See the COPYRIGHT.txt file distributed with this work for information
004 * regarding copyright ownership. Some portions may be licensed
005 * to Red Hat, Inc. under one or more contributor license agreements.
006 * See the AUTHORS.txt file in the distribution for a full listing of
007 * individual contributors.
008 *
009 * JBoss DNA is free software. Unless otherwise indicated, all code in JBoss DNA
010 * is licensed to you under the terms of the GNU Lesser General Public License as
011 * published by the Free Software Foundation; either version 2.1 of
012 * the License, or (at your option) any later version.
013 *
014 * JBoss DNA is distributed in the hope that it will be useful,
015 * but WITHOUT ANY WARRANTY; without even the implied warranty of
016 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
017 * Lesser General Public License for more details.
018 *
019 * You should have received a copy of the GNU Lesser General Public
020 * License along with this software; if not, write to the Free
021 * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
022 * 02110-1301 USA, or see the FSF site: http://www.fsf.org.
023 */
024 package org.jboss.dna.graph.xml;
025
026 import java.util.ArrayList;
027 import java.util.Arrays;
028 import java.util.Collection;
029 import java.util.Collections;
030 import java.util.HashMap;
031 import java.util.LinkedList;
032 import java.util.List;
033 import java.util.Map;
034 import javax.xml.parsers.SAXParser;
035 import net.jcip.annotations.NotThreadSafe;
036 import org.jboss.dna.common.text.TextDecoder;
037 import org.jboss.dna.common.text.XmlNameEncoder;
038 import org.jboss.dna.common.util.CheckArg;
039 import org.jboss.dna.graph.ExecutionContext;
040 import org.jboss.dna.graph.io.Destination;
041 import org.jboss.dna.graph.property.Name;
042 import org.jboss.dna.graph.property.NameFactory;
043 import org.jboss.dna.graph.property.NamespaceRegistry;
044 import org.jboss.dna.graph.property.Path;
045 import org.jboss.dna.graph.property.PathFactory;
046 import org.jboss.dna.graph.property.Property;
047 import org.jboss.dna.graph.property.PropertyFactory;
048 import org.jboss.dna.graph.property.basic.LocalNamespaceRegistry;
049 import org.xml.sax.Attributes;
050 import org.xml.sax.ext.DefaultHandler2;
051 import com.google.common.collect.LinkedHashMultimap;
052 import com.google.common.collect.Multimap;
053
054 /**
055 * A {@link DefaultHandler2} specialization that responds to XML content events by creating the corresponding content in the
056 * supplied graph. This implementation ignores DTD entities, XML contents, and other XML processing instructions. If other
057 * behavior is required, the appropriate methods can be overridden. (Which is why this class extends <code>DefaultHandler2</code>,
058 * which has support for processing all the different parts of XML.
059 * <p>
060 * This class can be passed to the {@link SAXParser}'s {@link SAXParser#parse(java.io.File, org.xml.sax.helpers.DefaultHandler)
061 * parse(..,DefaultHandler)} methods.
062 * </p>
063 *
064 * @author Randall Hauch
065 */
066 @NotThreadSafe
067 public class XmlHandler extends DefaultHandler2 {
068
069 /**
070 * The choices for how attributes that have no namespace prefix should be assigned a namespace.
071 *
072 * @author Randall Hauch
073 */
074 public enum AttributeScoping {
075 /** The attribute's namespace is the default namespace */
076 USE_DEFAULT_NAMESPACE,
077 /** The attribute's namespace is the same namespace as the containing element */
078 INHERIT_ELEMENT_NAMESPACE;
079 }
080
081 private final ExecutionContext context;
082
083 /**
084 * Decoder for XML names, to turn '_xHHHH_' sequences in the XML element and attribute names into the corresponding UTF-16
085 * characters.
086 */
087 public static TextDecoder DEFAULT_DECODER = new XmlNameEncoder();
088
089 /**
090 * The default {@link AttributeScoping}.
091 */
092 public static AttributeScoping DEFAULT_ATTRIBUTE_SCOPING = AttributeScoping.USE_DEFAULT_NAMESPACE;
093
094 /**
095 * The destination where the content should be sent.
096 */
097 protected final Destination destination;
098
099 /**
100 * The name of the XML attribute whose value should be used for the name of the node. For example, "jcr:name".
101 */
102 protected final Name nameAttribute;
103
104 /**
105 * The name of the property that is to be set with the type of the XML element. For example, "jcr:name".
106 */
107 protected final Name typeAttribute;
108
109 /**
110 * The value of the node type property, if the node's name is set with the {@link #nameAttribute}.
111 */
112 protected final Name typeAttributeValue;
113
114 /**
115 * The cached reference to the graph's path factory.
116 */
117 protected final PathFactory pathFactory;
118
119 /**
120 * The cached reference to the graph's name factory.
121 */
122 protected final NameFactory nameFactory;
123
124 /**
125 * The cached reference to the graph's property factory.
126 */
127 protected final PropertyFactory propertyFactory;
128
129 /**
130 * The cached reference to the graph's namespace registry.
131 */
132 protected final NamespaceRegistry namespaceRegistry;
133
134 /**
135 * The TextDecoder that is used to decode the names.
136 */
137 protected final TextDecoder decoder;
138
139 /**
140 * The stack of prefixes for each namespace, which is used to keep the {@link #namespaceRegistry local namespace registry} in
141 * sync with the namespaces in the XML document.
142 */
143 private final Map<String, LinkedList<String>> prefixStackByUri = new HashMap<String, LinkedList<String>>();
144
145 private final AttributeScoping attributeScoping;
146
147 /**
148 * The path for the node representing the current element. This starts out as the path supplied by the constructor, and never
149 * is shorter than that initial path.
150 */
151 protected Path currentPath;
152
153 /**
154 * Flag the records whether the first element should be skipped.
155 */
156 protected boolean skipFirstElement;
157
158 /**
159 * A temporary list used to store the properties for a single node. This is cleared, populated, then used to create the node.
160 */
161 protected final List<Property> properties = new ArrayList<Property>();
162
163 /**
164 * A working array that contains a single value object that is used to create Property objects (without having to create an
165 * array of values for each property).
166 */
167 protected final Object[] propertyValues = new Object[1];
168
169 /**
170 * Character buffer to aggregate nested character data
171 *
172 * @see ElementEntry
173 */
174 private StringBuilder characterDataBuffer = new StringBuilder();
175
176 /**
177 * Stack of pending {@link ElementEntry element entries} from the root of the imported content to the current node.
178 *
179 * @see ElementEntry
180 */
181 private final LinkedList<ElementEntry> elementStack = new LinkedList<ElementEntry>();
182
183 /**
184 * Create a handler that creates content in the supplied graph
185 *
186 * @param destination the destination where the content should be sent.graph in which the content should be placed
187 * @param skipRootElement true if the root element of the document should be skipped, or false if the root element should be
188 * converted to the top-level node of the content
189 * @param parent the path to the node in the graph under which the content should be placed; if null, the root node is assumed
190 * @param textDecoder the text decoder that should be used to decode the XML element names and XML attribute names, prior to
191 * using those values to create names; or null if the default encoder should be used
192 * @param nameAttribute the name of the property whose value should be used for the names of the nodes (typically, this is
193 * "jcr:name" or something equivalent); or null if the XML element name should always be used as the node name
194 * @param typeAttribute the name of the property that should be set with the type of the XML element, or null if there is no
195 * such property
196 * @param typeAttributeValue the value of the type property that should be used if the node has no <code>nameAttribute</code>,
197 * or null if the value should be set to the type of the XML element
198 * @param scoping defines how to choose the namespace of attributes that do not have a namespace prefix; if null, the
199 * {@link #DEFAULT_ATTRIBUTE_SCOPING} value is used
200 * @throws IllegalArgumentException if the destination reference is null
201 */
202 public XmlHandler( Destination destination,
203 boolean skipRootElement,
204 Path parent,
205 TextDecoder textDecoder,
206 Name nameAttribute,
207 Name typeAttribute,
208 Name typeAttributeValue,
209 AttributeScoping scoping ) {
210 CheckArg.isNotNull(destination, "destination");
211 assert destination != null;
212 this.destination = destination;
213 this.nameAttribute = nameAttribute;
214 this.typeAttribute = typeAttribute;
215 this.typeAttributeValue = typeAttributeValue;
216 this.decoder = textDecoder != null ? textDecoder : DEFAULT_DECODER;
217 this.skipFirstElement = skipRootElement;
218 this.attributeScoping = scoping != null ? scoping : DEFAULT_ATTRIBUTE_SCOPING;
219
220 // Use the execution context ...
221 this.context = destination.getExecutionContext();
222 assert this.context != null;
223
224 // Set up a local namespace registry that is kept in sync with the namespaces found in this XML document ...
225 NamespaceRegistry namespaceRegistry = new LocalNamespaceRegistry(this.context.getNamespaceRegistry());
226 final ExecutionContext localContext = this.context.with(namespaceRegistry);
227
228 // Set up references to frequently-used objects in the context ...
229 this.nameFactory = localContext.getValueFactories().getNameFactory();
230 this.pathFactory = localContext.getValueFactories().getPathFactory();
231 this.propertyFactory = localContext.getPropertyFactory();
232 this.namespaceRegistry = localContext.getNamespaceRegistry();
233 assert this.nameFactory != null;
234 assert this.pathFactory != null;
235 assert this.propertyFactory != null;
236 assert this.namespaceRegistry != null;
237
238 // Set up the initial path ...
239 this.currentPath = parent != null ? parent : this.pathFactory.createRootPath();
240 assert this.currentPath != null;
241 }
242
243 /**
244 * {@inheritDoc}
245 * <p>
246 * This method ensures that the namespace is registered with the {@link NamespaceRegistry registry}, using the supplied prefix
247 * to register the namespace if required. Note that because this class does not really use the namespace prefixes to create
248 * {@link Name} objects, no attempt is made to match the XML namespace prefixes.
249 * </p>
250 *
251 * @see org.xml.sax.helpers.DefaultHandler#startPrefixMapping(java.lang.String, java.lang.String)
252 */
253 @Override
254 public void startPrefixMapping( String prefix,
255 String uri ) {
256 assert uri != null;
257 // Add the prefix to the stack ...
258 LinkedList<String> prefixStack = this.prefixStackByUri.get(uri);
259 if (prefixStack == null) {
260 prefixStack = new LinkedList<String>();
261 this.prefixStackByUri.put(uri, prefixStack);
262 }
263 prefixStack.addFirst(prefix);
264
265 // If the namespace is already registered, then we'll have to register it in the context's registry, too.
266 if (!namespaceRegistry.isRegisteredNamespaceUri(uri)) {
267 // The namespace is not already registered (locally or in the context's registry), so we have to
268 // register it with the context's registry (which the local register then inherits).
269 NamespaceRegistry contextRegistry = context.getNamespaceRegistry();
270 if (contextRegistry.getNamespaceForPrefix(prefix) != null) {
271 // The prefix is already bound, so register and generate a unique prefix
272 context.getNamespaceRegistry().getPrefixForNamespaceUri(uri, true);
273 // Now register locally with the supplied prefix ...
274 namespaceRegistry.register(prefix, uri);
275 } else {
276 context.getNamespaceRegistry().register(prefix, uri);
277 }
278 } else {
279 // It is already registered, but re-register it locally using the supplied prefix ...
280 namespaceRegistry.register(prefix, uri);
281 }
282 }
283
284 /**
285 * {@inheritDoc}
286 *
287 * @see org.xml.sax.helpers.DefaultHandler#endPrefixMapping(java.lang.String)
288 */
289 @Override
290 public void endPrefixMapping( String prefix ) {
291 assert prefix != null;
292 // Get the current URI for this prefix ...
293 String uri = namespaceRegistry.getNamespaceForPrefix(prefix);
294 assert uri != null;
295
296 // Get the previous prefix from the stack ...
297 LinkedList<String> prefixStack = this.prefixStackByUri.get(uri);
298 assert prefixStack != null;
299 assert !prefixStack.isEmpty();
300 String existingPrefix = prefixStack.removeFirst();
301 assert prefix.equals(existingPrefix);
302
303 // If there are no previous prefixes, then remove the mapping ...
304 if (prefixStack.isEmpty()) {
305 namespaceRegistry.unregister(uri);
306 prefixStackByUri.remove(uri);
307 } else {
308 String previous = prefixStack.getFirst();
309 namespaceRegistry.register(previous, uri);
310 }
311 }
312
313 /**
314 * {@inheritDoc}
315 *
316 * @see org.xml.sax.helpers.DefaultHandler#startElement(java.lang.String, java.lang.String, java.lang.String,
317 * org.xml.sax.Attributes)
318 */
319 @Override
320 public void startElement( String uri,
321 String localName,
322 String name,
323 Attributes attributes ) {
324 // Should this (root) element be skipped?
325 if (skipFirstElement) {
326 skipFirstElement = false;
327 return;
328 }
329 assert localName != null;
330 Name nodeName = null;
331
332 ElementEntry element;
333 if (!elementStack.isEmpty()) {
334 // Add the parent
335 elementStack.peek().addAsNode();
336 element = new ElementEntry(elementStack.peek(), currentPath, null);
337 } else {
338 element = new ElementEntry(null, currentPath, null);
339 }
340 elementStack.addFirst(element);
341
342 properties.clear();
343 Object typePropertyValue = null;
344 // Convert each of the attributes to a property ...
345 for (int i = 0, len = attributes.getLength(); i != len; ++i) {
346 String attributeLocalName = attributes.getLocalName(i);
347 String attributeUri = attributes.getURI(i);
348 Name attributeName = null;
349 if ((attributeUri == null || attributeUri.length() == 0) && attributes.getQName(i).indexOf(':') == -1) {
350 switch (this.attributeScoping) {
351 case INHERIT_ELEMENT_NAMESPACE:
352 attributeName = nameFactory.create(uri, attributeLocalName, decoder);
353 break;
354 case USE_DEFAULT_NAMESPACE:
355 attributeName = nameFactory.create(attributeLocalName, decoder);
356 break;
357 }
358 } else {
359 attributeName = nameFactory.create(attributeUri, attributeLocalName, decoder);
360 }
361 assert attributeName != null;
362 // Check to see if this is an attribute that represents the node name (which may be null) ...
363 if (nodeName == null && attributeName.equals(nameAttribute)) {
364 nodeName = nameFactory.create(attributes.getValue(i)); // don't use a decoder
365 element.setName(nodeName);
366 continue;
367 }
368 if (typePropertyValue == null && attributeName.equals(typeAttribute)) {
369 typePropertyValue = nameFactory.create(attributes.getValue(i)); // don't use a decoder
370 continue;
371 }
372 // Create a property for this attribute ...
373 element.addProperty(attributeName, attributes.getValue(i));
374 }
375 // Create the node name if required ...
376 if (nodeName == null) {
377 // No attribute defines the node name ...
378 nodeName = nameFactory.create(uri, localName, decoder);
379 element.setName(nodeName);
380 } else {
381 if (typePropertyValue == null) typePropertyValue = nameFactory.create(uri, localName, decoder);
382 }
383 if (typeAttribute != null) {
384 // A attribute defines the node name. Set the type property, if required
385 if (typePropertyValue == null) typePropertyValue = typeAttributeValue;
386 if (typePropertyValue != null) {
387 element.addProperty(typeAttribute, typePropertyValue);
388 }
389 }
390
391 // Update the current path ...
392 currentPath = element.path();
393 }
394
395 /**
396 * {@inheritDoc}
397 *
398 * @see org.xml.sax.helpers.DefaultHandler#endElement(java.lang.String, java.lang.String, java.lang.String)
399 */
400 @Override
401 public void endElement( String uri,
402 String localName,
403 String name ) {
404
405 String s = characterDataBuffer.toString().trim();
406 if (s.length() > 0) {
407 elementStack.removeFirst().addAsPropertySetTo(s);
408 } else if (!elementStack.isEmpty()) {
409 elementStack.removeFirst().submit();
410 }
411 characterDataBuffer = new StringBuilder();
412
413 // Nothing to do but to change the current path to be the parent ...
414 currentPath = currentPath.getParent();
415 }
416
417 /**
418 * {@inheritDoc}
419 *
420 * @see org.xml.sax.helpers.DefaultHandler#characters(char[], int, int)
421 */
422 @Override
423 public void characters( char[] ch,
424 int start,
425 int length ) {
426 // Have to add this to a buffer as one logical set of character data can cause this method to fire multiple times
427 characterDataBuffer.append(ch, start, length);
428 }
429
430 /**
431 * {@inheritDoc}
432 *
433 * @see org.xml.sax.helpers.DefaultHandler#endDocument()
434 */
435 @Override
436 public void endDocument() {
437 // Submit any outstanding requests (if there are any) ...
438 destination.submit();
439 }
440
441 /**
442 * Create a property with the given name and value, obtained from an attribute name and value in the XML content.
443 * <p>
444 * By default, this method creates a property by directly using the value as the sole value of the property.
445 * </p>
446 *
447 * @param propertyName the name of the property; never null
448 * @param value the attribute value
449 * @return the property; may not be null
450 */
451 protected Property createProperty( Name propertyName,
452 Object value ) {
453 propertyValues[0] = value;
454 return propertyFactory.create(propertyName, propertyValues);
455 }
456
457 /**
458 * Create a property with the given name and values, obtained from an attribute name and value in the XML content.
459 * <p>
460 * By default, this method creates a property by directly using the values as the values of the property.
461 * </p>
462 *
463 * @param propertyName the name of the property; never null
464 * @param values the attribute values
465 * @return the property; may not be null
466 */
467 protected Property createProperty( Name propertyName,
468 Collection<Object> values ) {
469 return propertyFactory.create(propertyName, values);
470 }
471
472 /**
473 * Possible states for an {@link ElementEntry} instance. All element entries start in state {@code TBD} and then transition to
474 * one of the terminating states, {@code NODE} or {@code PROPERTY} when {@link ElementEntry#addAsNode()} or
475 * {@link ElementEntry#addAsPropertySetTo(Object)} is invoked.
476 */
477 protected enum ElementEntryState {
478 NODE,
479 PROPERTY,
480 TBD
481 }
482
483 /**
484 * Element entries hold references to the data of "pending" elements. "Pending" elements are elements which have been
485 * encountered through a {@link XmlHandler#startElement(String, String, String, Attributes)} event but have not yet been fully
486 * committed to the {@link XmlHandler#destination}.
487 * <p>
488 * As the current import semantics allow elements with nested character data to be imported as properties, it is not always
489 * possible to determine whether the element represents a node or a property from within the {@code startElement} method.
490 * Therefore, {@code ElementEntries} are initially created in an {@link ElementEntryState#TBD unknown state} and submitted to
491 * the {@code destination} when it can be positively determined that the entry represents a property (if nested character data
492 * is encountered) or a node (if a child node is detected or the {@link XmlHandler#endElement(String, String, String)} method
493 * is invoked prior to encountering nested character data).
494 * </p>
495 * <p>
496 * As DNA does not currently support a way to add a value to an existing property through the Graph API, {@code
497 * ElementEntries} also contain a {@link Multimap} of property names to values. The node's properties are aggregated and only
498 * submitted to the {@code destination} when the {@link XmlHandler#endElement(String, String, String)} event fires.
499 * </p>
500 */
501 private class ElementEntry {
502
503 private ElementEntry parent;
504 // Stored separately since the root node has no parent ElementEntry but does have a path
505 private Path pathToParent;
506 private Path pathToThisNode;
507 private Name name;
508 private Multimap<Name, Object> properties;
509 private ElementEntryState state;
510
511 protected ElementEntry( ElementEntry parent,
512 Path pathToParent,
513 Name name ) {
514 this.parent = parent;
515 this.pathToParent = pathToParent;
516 this.name = name;
517 this.state = ElementEntryState.TBD;
518 properties = new LinkedHashMultimap<Name, Object>();
519 }
520
521 protected void setName( Name name ) {
522 this.name = name;
523 pathToThisNode = pathFactory.create(pathToParent, name);
524 }
525
526 protected void addProperty( Name propertyName,
527 Object propertyValue ) {
528 assert state != ElementEntryState.PROPERTY;
529 properties.put(propertyName, propertyValue);
530 }
531
532 protected void addAsNode() {
533 assert state != ElementEntryState.PROPERTY;
534 if (state == ElementEntryState.NODE) return;
535
536 state = ElementEntryState.NODE;
537 destination.create(pathFactory.create(pathToParent, name), Collections.<Property>emptyList());
538 }
539
540 protected void addAsPropertySetTo( Object value ) {
541 assert state != ElementEntryState.NODE;
542 state = ElementEntryState.PROPERTY;
543 parent.addProperty(name, value);
544 }
545
546 protected final Path path() {
547 return pathToThisNode;
548 }
549
550 protected void submit() {
551 if (state == ElementEntryState.PROPERTY) return;
552
553 if (state == ElementEntryState.NODE && properties.size() == 0) return;
554 Property[] propertiesToAdd = new Property[properties.size()];
555 int i = 0;
556 for (Name name : properties.keySet()) {
557 propertiesToAdd[i++] = createProperty(name, properties.get(name));
558 }
559
560 if (state == ElementEntryState.TBD) {
561 // Merge the add and the create
562 destination.create(pathToThisNode, Arrays.asList(propertiesToAdd));
563 } else {
564 destination.setProperties(pathToThisNode, propertiesToAdd);
565 }
566 }
567 }
568 }