001 /*
002 * JBoss DNA (http://www.jboss.org/dna)
003 * See the COPYRIGHT.txt file distributed with this work for information
004 * regarding copyright ownership. Some portions may be licensed
005 * to Red Hat, Inc. under one or more contributor license agreements.
006 * See the AUTHORS.txt file in the distribution for a full listing of
007 * individual contributors.
008 *
009 * JBoss DNA is free software. Unless otherwise indicated, all code in JBoss DNA
010 * is licensed to you under the terms of the GNU Lesser General Public License as
011 * published by the Free Software Foundation; either version 2.1 of
012 * the License, or (at your option) any later version.
013 *
014 * JBoss DNA is distributed in the hope that it will be useful,
015 * but WITHOUT ANY WARRANTY; without even the implied warranty of
016 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
017 * Lesser General Public License for more details.
018 *
019 * You should have received a copy of the GNU Lesser General Public
020 * License along with this software; if not, write to the Free
021 * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
022 * 02110-1301 USA, or see the FSF site: http://www.fsf.org.
023 */
024 package org.jboss.dna.sequencer.xml;
025
026 import java.util.ArrayList;
027 import java.util.HashMap;
028 import java.util.LinkedList;
029 import java.util.List;
030 import java.util.Map;
031 import org.jboss.dna.common.collection.Problems;
032 import org.jboss.dna.common.text.TextDecoder;
033 import org.jboss.dna.common.text.XmlNameEncoder;
034 import org.jboss.dna.common.util.CheckArg;
035 import org.jboss.dna.common.util.StringUtil;
036 import org.jboss.dna.graph.ExecutionContext;
037 import org.jboss.dna.graph.JcrLexicon;
038 import org.jboss.dna.graph.property.Name;
039 import org.jboss.dna.graph.property.NameFactory;
040 import org.jboss.dna.graph.property.NamespaceRegistry;
041 import org.jboss.dna.graph.property.Path;
042 import org.jboss.dna.graph.property.PathFactory;
043 import org.jboss.dna.graph.property.PropertyFactory;
044 import org.jboss.dna.graph.property.ValueFormatException;
045 import org.jboss.dna.graph.property.basic.LocalNamespaceRegistry;
046 import org.jboss.dna.graph.sequencer.StreamSequencerContext;
047 import org.jboss.dna.graph.sequencer.SequencerOutput;
048 import org.xml.sax.Attributes;
049 import org.xml.sax.SAXParseException;
050 import org.xml.sax.ext.DefaultHandler2;
051
052 /**
053 * @author Randall Hauch
054 */
055 public class XmlSequencerHandler extends DefaultHandler2 {
056
057 private final SequencerOutput output;
058 private final StreamSequencerContext context;
059
060 /**
061 * Decoder for XML names, to turn '_xHHHH_' sequences in the XML element and attribute names into the corresponding UTF-16
062 * characters.
063 */
064 public static TextDecoder DEFAULT_DECODER = new XmlNameEncoder();
065
066 /**
067 * The default {@link XmlSequencer.AttributeScoping}.
068 */
069 public static XmlSequencer.AttributeScoping DEFAULT_ATTRIBUTE_SCOPING = XmlSequencer.AttributeScoping.USE_DEFAULT_NAMESPACE;
070
071 /**
072 * The name of the attribute that should be used for the node name.
073 */
074 protected final Name nameAttribute;
075
076 /**
077 * The default primary type.
078 */
079 protected final Name defaultPrimaryType;
080
081 /**
082 * The cached reference to the graph's path factory.
083 */
084 protected final PathFactory pathFactory;
085
086 /**
087 * The cached reference to the graph's name factory.
088 */
089 protected final NameFactory nameFactory;
090
091 /**
092 * The cached reference to the graph's property factory.
093 */
094 protected final PropertyFactory propertyFactory;
095
096 /**
097 * The cached reference to the graph's namespace registry.
098 */
099 protected final NamespaceRegistry namespaceRegistry;
100
101 /**
102 * The TextDecoder that is used to decode the names.
103 */
104 protected final TextDecoder decoder;
105
106 /**
107 * The stack of prefixes for each namespace, which is used to keep the {@link #namespaceRegistry local namespace registry} in
108 * sync with the namespaces in the XML document.
109 */
110 private final Map<String, LinkedList<String>> prefixStackByUri = new HashMap<String, LinkedList<String>>();
111
112 private final XmlSequencer.AttributeScoping attributeScoping;
113
114 /**
115 * The path for the node representing the current element. This starts out as the path supplied by the constructor, and never
116 * is shorter than that initial path.
117 */
118 protected Path currentPath;
119
120 // Recursive map used to track the number of occurrences of names for elements under a particular path
121 private Map<Name, List<IndexedName>> nameToIndexedNamesMap = new HashMap<Name, List<IndexedName>>();
122
123 // The stack of recursive maps being processed, with the head entry being the map for the current path
124 private final LinkedList<Map<Name, List<IndexedName>>> nameToIndexedNamesMapStack = new LinkedList<Map<Name, List<IndexedName>>>();
125
126 private String currentEntityName;
127 private StringBuilder cDataContent;
128 private StringBuilder contentBuilder;
129 private final Problems problems;
130 private final Map<String, String> entityValues = new HashMap<String, String>();
131
132 /**
133 * @param output
134 * @param context
135 * @param nameAttribute
136 * @param defaultPrimaryType
137 * @param textDecoder
138 * @param scoping
139 */
140 XmlSequencerHandler( SequencerOutput output,
141 StreamSequencerContext context,
142 Name nameAttribute,
143 Name defaultPrimaryType,
144 TextDecoder textDecoder,
145 XmlSequencer.AttributeScoping scoping ) {
146 CheckArg.isNotNull(output, "output");
147 CheckArg.isNotNull(context, "context");
148
149 // Use the execution context ...
150 this.output = output;
151 this.context = context;
152 this.problems = context.getProblems();
153 assert this.problems != null;
154
155 this.nameAttribute = nameAttribute;
156 this.defaultPrimaryType = defaultPrimaryType;
157 this.decoder = textDecoder != null ? textDecoder : DEFAULT_DECODER;
158 this.attributeScoping = scoping != null ? scoping : DEFAULT_ATTRIBUTE_SCOPING;
159
160 // Set up a local namespace registry that is kept in sync with the namespaces found in this XML document ...
161 NamespaceRegistry namespaceRegistry = new LocalNamespaceRegistry(this.context.getNamespaceRegistry());
162 final ExecutionContext localContext = this.context.with(namespaceRegistry);
163
164 // Set up references to frequently-used objects in the context ...
165 this.nameFactory = localContext.getValueFactories().getNameFactory();
166 this.pathFactory = localContext.getValueFactories().getPathFactory();
167 this.propertyFactory = localContext.getPropertyFactory();
168 this.namespaceRegistry = localContext.getNamespaceRegistry();
169 assert this.nameFactory != null;
170 assert this.pathFactory != null;
171 assert this.propertyFactory != null;
172 assert this.namespaceRegistry != null;
173
174 // Set up the initial path ...
175 this.currentPath = this.pathFactory.createRelativePath();
176 assert this.currentPath != null;
177 }
178
179 private void startNode( Name name ) {
180 // Check if content still needs to be output
181 if (contentBuilder != null) endContent();
182 // Add name to list of indexed names for this element to ensure we use the correct index (which is the size of the
183 // list)
184 List<IndexedName> indexedNames = nameToIndexedNamesMap.get(name);
185 if (indexedNames == null) {
186 indexedNames = new ArrayList<IndexedName>();
187 nameToIndexedNamesMap.put(name, indexedNames);
188 }
189 IndexedName indexedName = new IndexedName();
190 indexedNames.add(indexedName);
191 // Add element name and the appropriate index to the path.
192 // Per the JCR spec, the index must be relative to same-name sibling nodes
193 currentPath = pathFactory.create(currentPath, name, indexedNames.size()).getNormalizedPath();
194 // currentPath = currentPath.getNormalizedPath();
195 // Add the indexed name map to the stack and set the current map to the new element's map
196 nameToIndexedNamesMapStack.addFirst(nameToIndexedNamesMap);
197 nameToIndexedNamesMap = indexedName.nameToIndexedNamesMap;
198 }
199
200 private void endNode() {
201 // Recover parent's path, namespace, and indexedName map, clearing the ended element's map to free memory
202 currentPath = currentPath.getParent();
203 currentPath = currentPath.getNormalizedPath();
204 nameToIndexedNamesMap.clear();
205 nameToIndexedNamesMap = nameToIndexedNamesMapStack.removeFirst();
206 }
207
208 /**
209 * See if there is any element content that needs to be completed.
210 */
211 protected void endContent() {
212 // Process the content of the element ...
213 String content = StringUtil.normalize(contentBuilder.toString());
214 // Null-out builder to setup for subsequent content.
215 // Must be done before call to startElement below to prevent infinite loop.
216 contentBuilder = null;
217 // Skip if nothing in content but whitespace
218 if (content.length() > 0) {
219 // Create separate node for each content entry since entries can be interspersed amongst child elements
220 startNode(DnaXmlLexicon.ELEMENT_CONTENT);
221 output.setProperty(currentPath, JcrLexicon.PRIMARY_TYPE, DnaXmlLexicon.ELEMENT_CONTENT);
222 output.setProperty(currentPath, DnaXmlLexicon.ELEMENT_CONTENT, content);
223 endNode();
224 }
225 }
226
227 /**
228 * <p>
229 * {@inheritDoc}
230 * </p>
231 *
232 * @see org.xml.sax.helpers.DefaultHandler#startDocument()
233 */
234 @Override
235 public void startDocument() {
236 output.setProperty(currentPath, JcrLexicon.PRIMARY_TYPE, DnaXmlLexicon.DOCUMENT);
237 }
238
239 /**
240 * <p>
241 * {@inheritDoc}
242 * </p>
243 *
244 * @see org.xml.sax.ext.DefaultHandler2#startDTD(java.lang.String, java.lang.String, java.lang.String)
245 */
246 @Override
247 public void startDTD( String name,
248 String publicId,
249 String systemId ) {
250 output.setProperty(currentPath, DnaDtdLexicon.NAME, name);
251 output.setProperty(currentPath, DnaDtdLexicon.PUBLIC_ID, publicId);
252 output.setProperty(currentPath, DnaDtdLexicon.SYSTEM_ID, systemId);
253 }
254
255 /**
256 * <p>
257 * {@inheritDoc}
258 * </p>
259 *
260 * @see org.xml.sax.ext.DefaultHandler2#externalEntityDecl(java.lang.String, java.lang.String, java.lang.String)
261 */
262 @Override
263 public void externalEntityDecl( String name,
264 String publicId,
265 String systemId ) {
266 // Add "synthetic" entity container to path to help prevent name collisions with XML elements
267 startNode(DnaDtdLexicon.ENTITY);
268 output.setProperty(currentPath, JcrLexicon.PRIMARY_TYPE, DnaDtdLexicon.ENTITY);
269 output.setProperty(currentPath, DnaDtdLexicon.NAME, name);
270 if (publicId != null) output.setProperty(currentPath, DnaDtdLexicon.PUBLIC_ID, publicId);
271 if (systemId != null) output.setProperty(currentPath, DnaDtdLexicon.SYSTEM_ID, systemId);
272 endNode();
273 }
274
275 /**
276 * {@inheritDoc}
277 *
278 * @see org.xml.sax.ext.DefaultHandler2#internalEntityDecl(java.lang.String, java.lang.String)
279 */
280 @Override
281 public void internalEntityDecl( String name,
282 String value ) {
283 // Add "synthetic" entity container to path to help prevent name collisions with XML elements
284 startNode(DnaDtdLexicon.ENTITY);
285 output.setProperty(currentPath, JcrLexicon.PRIMARY_TYPE, DnaDtdLexicon.ENTITY);
286 output.setProperty(currentPath, DnaDtdLexicon.NAME, name);
287 output.setProperty(currentPath, DnaDtdLexicon.VALUE, value);
288 // Record the name/value pair ...
289 entityValues.put(name, value);
290 endNode();
291 }
292
293 /**
294 * <p>
295 * {@inheritDoc}
296 * </p>
297 *
298 * @see org.xml.sax.helpers.DefaultHandler#processingInstruction(java.lang.String, java.lang.String)
299 */
300 @Override
301 public void processingInstruction( String target,
302 String data ) {
303 // Output separate nodes for each instruction since multiple are allowed
304 startNode(DnaXmlLexicon.PROCESSING_INSTRUCTION);
305 output.setProperty(currentPath, JcrLexicon.PRIMARY_TYPE, DnaXmlLexicon.PROCESSING_INSTRUCTION);
306 output.setProperty(currentPath, DnaXmlLexicon.TARGET, target.trim());
307 if (data != null) {
308 output.setProperty(currentPath, DnaXmlLexicon.PROCESSING_INSTRUCTION_CONTENT, data.trim());
309 }
310 endNode();
311 }
312
313 /**
314 * {@inheritDoc}
315 * <p>
316 * This method ensures that the namespace is registered with the {@link NamespaceRegistry registry}, using the supplied prefix
317 * to register the namespace if required. Note that because this class does not really use the namespace prefixes to create
318 * {@link Name} objects, no attempt is made to match the XML namespace prefixes.
319 * </p>
320 *
321 * @see org.xml.sax.helpers.DefaultHandler#startPrefixMapping(java.lang.String, java.lang.String)
322 */
323 @Override
324 public void startPrefixMapping( String prefix,
325 String uri ) {
326 assert uri != null;
327 // Add the prefix to the stack ...
328 LinkedList<String> prefixStack = this.prefixStackByUri.get(uri);
329 if (prefixStack == null) {
330 prefixStack = new LinkedList<String>();
331 this.prefixStackByUri.put(uri, prefixStack);
332 }
333 prefixStack.addFirst(prefix);
334
335 // If the namespace is already registered, then we'll have to register it in the context's registry, too.
336 if (!namespaceRegistry.isRegisteredNamespaceUri(uri)) {
337 // The namespace is not already registered (locally or in the context's registry), so we have to
338 // register it with the context's registry (which the local register then inherits).
339 NamespaceRegistry contextRegistry = context.getNamespaceRegistry();
340 if (contextRegistry.getNamespaceForPrefix(prefix) != null) {
341 // The prefix is already bound, so register and generate a unique prefix
342 context.getNamespaceRegistry().getPrefixForNamespaceUri(uri, true);
343 // Now register locally with the supplied prefix ...
344 namespaceRegistry.register(prefix, uri);
345 } else {
346 context.getNamespaceRegistry().register(prefix, uri);
347 }
348 } else {
349 // It is already registered, but re-register it locally using the supplied prefix ...
350 namespaceRegistry.register(prefix, uri);
351 }
352 }
353
354 /**
355 * {@inheritDoc}
356 *
357 * @see org.xml.sax.helpers.DefaultHandler#endPrefixMapping(java.lang.String)
358 */
359 @Override
360 public void endPrefixMapping( String prefix ) {
361 assert prefix != null;
362 // Get the current URI for this prefix ...
363 String uri = namespaceRegistry.getNamespaceForPrefix(prefix);
364 assert uri != null;
365
366 // Get the previous prefix from the stack ...
367 LinkedList<String> prefixStack = this.prefixStackByUri.get(uri);
368 assert prefixStack != null;
369 assert !prefixStack.isEmpty();
370 String existingPrefix = prefixStack.removeFirst();
371 assert prefix.equals(existingPrefix);
372
373 // If there are no previous prefixes, then remove the mapping ...
374 if (prefixStack.isEmpty()) {
375 namespaceRegistry.unregister(uri);
376 prefixStackByUri.remove(uri);
377 } else {
378 String previous = prefixStack.getFirst();
379 namespaceRegistry.register(previous, uri);
380 }
381 }
382
383 /**
384 * <p>
385 * {@inheritDoc}
386 * </p>
387 *
388 * @see org.xml.sax.ext.DefaultHandler2#startEntity(java.lang.String)
389 */
390 @Override
391 public void startEntity( String name ) {
392 // Record that we've started an entity by capturing the name of the entity ...
393 currentEntityName = name;
394 }
395
396 /**
397 * <p>
398 * {@inheritDoc}
399 * </p>
400 *
401 * @see org.xml.sax.ext.DefaultHandler2#endEntity(java.lang.String)
402 */
403 @Override
404 public void endEntity( String name ) {
405 // currentEntityName is nulled in 'characters(...)', not here.
406 // See DNA-231 for an issue related to this
407 }
408
409 /**
410 * <p>
411 * {@inheritDoc}
412 * </p>
413 *
414 * @see org.xml.sax.ext.DefaultHandler2#startCDATA()
415 */
416 @Override
417 public void startCDATA() {
418 // CDATA sections can start in the middle of element content, so there may already be some
419 // element content already processed ...
420 if (contentBuilder != null) endContent();
421
422 // Prepare builder for concatenating consecutive lines of CDATA
423 cDataContent = new StringBuilder();
424 }
425
426 /**
427 * {@inheritDoc}
428 *
429 * @see org.xml.sax.ext.DefaultHandler2#endCDATA()
430 */
431 @Override
432 public void endCDATA() {
433 // Output CDATA built in characters() method
434 startNode(DnaXmlLexicon.CDATA);
435 output.setProperty(currentPath, JcrLexicon.PRIMARY_TYPE, defaultPrimaryType);
436 output.setProperty(currentPath, DnaXmlLexicon.CDATA_CONTENT, cDataContent.toString());
437 endNode();
438 // Null-out builder to free memory
439 cDataContent = null;
440 }
441
442 /**
443 * {@inheritDoc}
444 *
445 * @see org.xml.sax.helpers.DefaultHandler#characters(char[], int, int)
446 */
447 @Override
448 public void characters( char[] ch,
449 int start,
450 int length ) {
451 String content = String.valueOf(ch, start, length);
452 if (cDataContent != null) {
453 // Processing the characters in the CDATA, so add to the builder
454 cDataContent.append(ch, start, length);
455 // Text within builder will be output at the end of CDATA
456 } else {
457 if (contentBuilder == null) {
458 // This is the first line of content, so we have to create the StringBuilder ...
459 contentBuilder = new StringBuilder();
460 }
461 if (currentEntityName != null) {
462 // This is an entity reference, so rather than use the entity value characters (the content passed
463 // into this method), we want to keep the entity reference ...
464 contentBuilder.append('&').append(currentEntityName).append(';');
465
466 // Normally, 'characters' is called with just the entity replacement characters,
467 // and is called between 'startEntity' and 'endEntity'. However, per DNA-231, some JVMs
468 // use an incorrect ordering: 'startEntity', 'endEntity' and then 'characters', and the
469 // content passed to the 'characters' call not only includes the entity replacement characters
470 // followed by other content. Look for this condition ...
471 String entityValue = entityValues.get(currentEntityName);
472 if (!content.equals(entityValue) && entityValue != null && entityValue.length() < content.length()) {
473 // Per DNA-231, there's extra content after the entity value. So replace the entity value in the
474 // content with the entity reference (not the replacement characters), and add the extra content ...
475 String extraContent = content.substring(entityValue.length());
476 contentBuilder.append(extraContent);
477 }
478 // We're done reading the entity characters, so null it out
479 currentEntityName = null;
480 } else {
481 // Just append the content normally ...
482 contentBuilder.append(content);
483 }
484 // Text within builder will be output when another element or CDATA is encountered
485 }
486 }
487
488 /**
489 * {@inheritDoc}
490 *
491 * @see org.xml.sax.ext.DefaultHandler2#comment(char[], int, int)
492 */
493 @Override
494 public void comment( char[] ch,
495 int start,
496 int length ) {
497 // Output separate nodes for each comment since multiple are allowed
498 startNode(DnaXmlLexicon.COMMENT);
499 output.setProperty(currentPath, JcrLexicon.PRIMARY_TYPE, DnaXmlLexicon.COMMENT);
500 output.setProperty(currentPath, DnaXmlLexicon.COMMENT_CONTENT, String.valueOf(ch, start, length).trim());
501 endNode();
502 }
503
504 /**
505 * {@inheritDoc}
506 *
507 * @see org.xml.sax.helpers.DefaultHandler#startElement(java.lang.String, java.lang.String, java.lang.String,
508 * org.xml.sax.Attributes)
509 */
510 @Override
511 public void startElement( String uri,
512 String localName,
513 String name,
514 Attributes attributes ) {
515 assert localName != null;
516
517 // Create the node with the name built from the element's name ...
518 Name nodeName = null;
519 if (nameAttribute != null) {
520 try {
521 String jcrNameValue = attributes.getValue(nameAttribute.getNamespaceUri(), nameAttribute.getLocalName());
522 nodeName = nameFactory.create(jcrNameValue);
523 } catch (ValueFormatException e) {
524 }
525 }
526 if (nodeName == null) nodeName = nameFactory.create(uri, localName, decoder);
527 startNode(nodeName);
528
529 // Set the type of the node ...
530 if (defaultPrimaryType != null) {
531 output.setProperty(currentPath, JcrLexicon.PRIMARY_TYPE, defaultPrimaryType);
532 }
533
534 // Now, set each attribute as a property ...
535 for (int i = 0, len = attributes.getLength(); i != len; ++i) {
536 String attributeLocalName = attributes.getLocalName(i);
537 String attributeUri = attributes.getURI(i);
538 Name attributeName = null;
539 if ((attributeUri == null || attributeUri.length() == 0) && attributes.getQName(i).indexOf(':') == -1) {
540 switch (this.attributeScoping) {
541 case INHERIT_ELEMENT_NAMESPACE:
542 attributeName = nameFactory.create(uri, attributeLocalName, decoder);
543 break;
544 case USE_DEFAULT_NAMESPACE:
545 attributeName = nameFactory.create(attributeLocalName, decoder);
546 break;
547 }
548 } else {
549 attributeName = nameFactory.create(attributeUri, attributeLocalName, decoder);
550 }
551 assert attributeName != null;
552 if (JcrLexicon.NAME.equals(attributeName)) {
553 // We don't want to record the "jcr:name" attribute since it won't match the node name ...
554 continue;
555 }
556 Object value = attributes.getValue(i);
557 if (JcrLexicon.PRIMARY_TYPE.equals(attributeName)) {
558 // Convert it to a name ...
559 value = nameFactory.create(value);
560 }
561 output.setProperty(currentPath, attributeName, attributes.getValue(i));
562 }
563 }
564
565 /**
566 * {@inheritDoc}
567 *
568 * @see org.jboss.dna.graph.xml.XmlHandler#endElement(java.lang.String, java.lang.String, java.lang.String)
569 */
570 @Override
571 public void endElement( String uri,
572 String localName,
573 String name ) {
574 // Check if content still needs to be output
575 if (contentBuilder != null) endContent();
576
577 // End the current node ...
578 endNode();
579 }
580
581 /**
582 * <p>
583 * {@inheritDoc}
584 * </p>
585 *
586 * @see org.xml.sax.helpers.DefaultHandler#warning(org.xml.sax.SAXParseException)
587 */
588 @Override
589 public void warning( SAXParseException warning ) {
590 problems.addWarning(warning, XmlSequencerI18n.warningSequencingXmlDocument, warning);
591 }
592
593 /**
594 * {@inheritDoc}
595 *
596 * @see org.xml.sax.helpers.DefaultHandler#error(org.xml.sax.SAXParseException)
597 */
598 @Override
599 public void error( SAXParseException error ) {
600 problems.addError(error, XmlSequencerI18n.errorSequencingXmlDocument, error);
601 }
602
603 /**
604 * {@inheritDoc}
605 *
606 * @see org.xml.sax.helpers.DefaultHandler#fatalError(org.xml.sax.SAXParseException)
607 */
608 @Override
609 public void fatalError( SAXParseException error ) {
610 problems.addError(error, XmlSequencerI18n.errorSequencingXmlDocument, error);
611 }
612
613 private class IndexedName {
614
615 Map<Name, List<IndexedName>> nameToIndexedNamesMap = new HashMap<Name, List<IndexedName>>();
616
617 IndexedName() {
618 }
619 }
620 }