001 /*
002 * JBoss, Home of Professional Open Source.
003 * Copyright 2008, Red Hat Middleware LLC, and individual contributors
004 * as indicated by the @author tags. See the copyright.txt file in the
005 * distribution for a full listing of individual contributors.
006 *
007 * This is free software; you can redistribute it and/or modify it
008 * under the terms of the GNU Lesser General Public License as
009 * published by the Free Software Foundation; either version 2.1 of
010 * the License, or (at your option) any later version.
011 *
012 * This software is distributed in the hope that it will be useful,
013 * but WITHOUT ANY WARRANTY; without even the implied warranty of
014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
015 * Lesser General Public License for more details.
016 *
017 * You should have received a copy of the GNU Lesser General Public
018 * License along with this software; if not, write to the Free
019 * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
020 * 02110-1301 USA, or see the FSF site: http://www.fsf.org.
021 */
022 package org.jboss.dna.sequencer.xml;
023
024 import java.util.ArrayList;
025 import java.util.HashMap;
026 import java.util.LinkedList;
027 import java.util.List;
028 import java.util.Map;
029 import org.jboss.dna.common.collection.Problems;
030 import org.jboss.dna.common.text.TextDecoder;
031 import org.jboss.dna.common.text.XmlNameEncoder;
032 import org.jboss.dna.common.util.CheckArg;
033 import org.jboss.dna.common.util.StringUtil;
034 import org.jboss.dna.graph.BasicExecutionContext;
035 import org.jboss.dna.graph.ExecutionContext;
036 import org.jboss.dna.graph.JcrLexicon;
037 import org.jboss.dna.graph.properties.Name;
038 import org.jboss.dna.graph.properties.NameFactory;
039 import org.jboss.dna.graph.properties.NamespaceRegistry;
040 import org.jboss.dna.graph.properties.Path;
041 import org.jboss.dna.graph.properties.PathFactory;
042 import org.jboss.dna.graph.properties.PropertyFactory;
043 import org.jboss.dna.graph.properties.ValueFormatException;
044 import org.jboss.dna.graph.properties.basic.LocalNamespaceRegistry;
045 import org.jboss.dna.graph.sequencers.SequencerContext;
046 import org.jboss.dna.graph.sequencers.SequencerOutput;
047 import org.xml.sax.Attributes;
048 import org.xml.sax.SAXParseException;
049 import org.xml.sax.ext.DefaultHandler2;
050
051 /**
052 * @author Randall Hauch
053 */
054 public class XmlSequencerHandler extends DefaultHandler2 {
055
056 private final SequencerOutput output;
057 private final SequencerContext context;
058
059 /**
060 * Decoder for XML names, to turn '_xHHHH_' sequences in the XML element and attribute names into the corresponding UTF-16
061 * characters.
062 */
063 public static TextDecoder DEFAULT_DECODER = new XmlNameEncoder();
064
065 /**
066 * The default {@link XmlSequencer.AttributeScoping}.
067 */
068 public static XmlSequencer.AttributeScoping DEFAULT_ATTRIBUTE_SCOPING = XmlSequencer.AttributeScoping.USE_DEFAULT_NAMESPACE;
069
070 /**
071 * The name of the attribute that should be used for the node name.
072 */
073 protected final Name nameAttribute;
074
075 /**
076 * The default primary type.
077 */
078 protected final Name defaultPrimaryType;
079
080 /**
081 * The cached reference to the graph's path factory.
082 */
083 protected final PathFactory pathFactory;
084
085 /**
086 * The cached reference to the graph's name factory.
087 */
088 protected final NameFactory nameFactory;
089
090 /**
091 * The cached reference to the graph's property factory.
092 */
093 protected final PropertyFactory propertyFactory;
094
095 /**
096 * The cached reference to the graph's namespace registry.
097 */
098 protected final NamespaceRegistry namespaceRegistry;
099
100 /**
101 * The TextDecoder that is used to decode the names.
102 */
103 protected final TextDecoder decoder;
104
105 /**
106 * The stack of prefixes for each namespace, which is used to keep the {@link #namespaceRegistry local namespace registry} in
107 * sync with the namespaces in the XML document.
108 */
109 private final Map<String, LinkedList<String>> prefixStackByUri = new HashMap<String, LinkedList<String>>();
110
111 private final XmlSequencer.AttributeScoping attributeScoping;
112
113 /**
114 * The path for the node representing the current element. This starts out as the path supplied by the constructor, and never
115 * is shorter than that initial path.
116 */
117 protected Path currentPath;
118
119 // Recursive map used to track the number of occurrences of names for elements under a particular path
120 private Map<Name, List<IndexedName>> nameToIndexedNamesMap = new HashMap<Name, List<IndexedName>>();
121
122 // The stack of recursive maps being processed, with the head entry being the map for the current path
123 private final LinkedList<Map<Name, List<IndexedName>>> nameToIndexedNamesMapStack = new LinkedList<Map<Name, List<IndexedName>>>();
124
125 private String currentEntityName;
126 private StringBuilder cDataContent;
127 private StringBuilder contentBuilder;
128 private final Problems problems;
129 private final Map<String, String> entityValues = new HashMap<String, String>();
130
131 /**
132 * @param output
133 * @param context
134 * @param nameAttribute
135 * @param defaultPrimaryType
136 * @param textDecoder
137 * @param scoping
138 */
139 XmlSequencerHandler( SequencerOutput output,
140 SequencerContext context,
141 Name nameAttribute,
142 Name defaultPrimaryType,
143 TextDecoder textDecoder,
144 XmlSequencer.AttributeScoping scoping ) {
145 CheckArg.isNotNull(output, "output");
146 CheckArg.isNotNull(context, "context");
147
148 // Use the execution context ...
149 this.output = output;
150 this.context = context;
151 this.problems = context.getProblems();
152 assert this.problems != null;
153
154 this.nameAttribute = nameAttribute;
155 this.defaultPrimaryType = defaultPrimaryType;
156 this.decoder = textDecoder != null ? textDecoder : DEFAULT_DECODER;
157 this.attributeScoping = scoping != null ? scoping : DEFAULT_ATTRIBUTE_SCOPING;
158
159 // Set up a local namespace registry that is kept in sync with the namespaces found in this XML document ...
160 NamespaceRegistry namespaceRegistry = new LocalNamespaceRegistry(this.context.getNamespaceRegistry());
161 final ExecutionContext localContext = new BasicExecutionContext(this.context, namespaceRegistry);
162
163 // Set up references to frequently-used objects in the context ...
164 this.nameFactory = localContext.getValueFactories().getNameFactory();
165 this.pathFactory = localContext.getValueFactories().getPathFactory();
166 this.propertyFactory = localContext.getPropertyFactory();
167 this.namespaceRegistry = localContext.getNamespaceRegistry();
168 assert this.nameFactory != null;
169 assert this.pathFactory != null;
170 assert this.propertyFactory != null;
171 assert this.namespaceRegistry != null;
172
173 // Set up the initial path ...
174 this.currentPath = this.pathFactory.createRelativePath();
175 assert this.currentPath != null;
176 }
177
178 private void startNode( Name name ) {
179 // Check if content still needs to be output
180 if (contentBuilder != null) endContent();
181 // Add name to list of indexed names for this element to ensure we use the correct index (which is the size of the
182 // list)
183 List<IndexedName> indexedNames = nameToIndexedNamesMap.get(name);
184 if (indexedNames == null) {
185 indexedNames = new ArrayList<IndexedName>();
186 nameToIndexedNamesMap.put(name, indexedNames);
187 }
188 IndexedName indexedName = new IndexedName();
189 indexedNames.add(indexedName);
190 // Add element name and the appropriate index to the path.
191 // Per the JCR spec, the index must be relative to same-name sibling nodes
192 currentPath = pathFactory.create(currentPath, name, indexedNames.size()).getNormalizedPath();
193 // currentPath = currentPath.getNormalizedPath();
194 // Add the indexed name map to the stack and set the current map to the new element's map
195 nameToIndexedNamesMapStack.addFirst(nameToIndexedNamesMap);
196 nameToIndexedNamesMap = indexedName.nameToIndexedNamesMap;
197 }
198
199 private void endNode() {
200 // Recover parent's path, namespace, and indexedName map, clearing the ended element's map to free memory
201 currentPath = currentPath.getParent();
202 currentPath = currentPath.getNormalizedPath();
203 nameToIndexedNamesMap.clear();
204 nameToIndexedNamesMap = nameToIndexedNamesMapStack.removeFirst();
205 }
206
207 /**
208 * See if there is any element content that needs to be completed.
209 */
210 protected void endContent() {
211 // Process the content of the element ...
212 String content = StringUtil.normalize(contentBuilder.toString());
213 // Null-out builder to setup for subsequent content.
214 // Must be done before call to startElement below to prevent infinite loop.
215 contentBuilder = null;
216 // Skip if nothing in content but whitespace
217 if (content.length() > 0) {
218 // Create separate node for each content entry since entries can be interspersed amongst child elements
219 startNode(DnaXmlLexicon.ELEMENT_CONTENT);
220 output.setProperty(currentPath, JcrLexicon.PRIMARY_TYPE, DnaXmlLexicon.ELEMENT_CONTENT);
221 output.setProperty(currentPath, DnaXmlLexicon.ELEMENT_CONTENT, content);
222 endNode();
223 }
224 }
225
226 /**
227 * <p>
228 * {@inheritDoc}
229 * </p>
230 *
231 * @see org.xml.sax.helpers.DefaultHandler#startDocument()
232 */
233 @Override
234 public void startDocument() {
235 output.setProperty(currentPath, JcrLexicon.PRIMARY_TYPE, DnaXmlLexicon.DOCUMENT);
236 }
237
238 /**
239 * <p>
240 * {@inheritDoc}
241 * </p>
242 *
243 * @see org.xml.sax.ext.DefaultHandler2#startDTD(java.lang.String, java.lang.String, java.lang.String)
244 */
245 @Override
246 public void startDTD( String name,
247 String publicId,
248 String systemId ) {
249 output.setProperty(currentPath, DnaDtdLexicon.NAME, name);
250 output.setProperty(currentPath, DnaDtdLexicon.PUBLIC_ID, publicId);
251 output.setProperty(currentPath, DnaDtdLexicon.SYSTEM_ID, systemId);
252 }
253
254 /**
255 * <p>
256 * {@inheritDoc}
257 * </p>
258 *
259 * @see org.xml.sax.ext.DefaultHandler2#externalEntityDecl(java.lang.String, java.lang.String, java.lang.String)
260 */
261 @Override
262 public void externalEntityDecl( String name,
263 String publicId,
264 String systemId ) {
265 // Add "synthetic" entity container to path to help prevent name collisions with XML elements
266 startNode(DnaDtdLexicon.ENTITY);
267 output.setProperty(currentPath, JcrLexicon.PRIMARY_TYPE, DnaDtdLexicon.ENTITY);
268 output.setProperty(currentPath, DnaDtdLexicon.NAME, name);
269 if (publicId != null) output.setProperty(currentPath, DnaDtdLexicon.PUBLIC_ID, publicId);
270 if (systemId != null) output.setProperty(currentPath, DnaDtdLexicon.SYSTEM_ID, systemId);
271 endNode();
272 }
273
274 /**
275 * {@inheritDoc}
276 *
277 * @see org.xml.sax.ext.DefaultHandler2#internalEntityDecl(java.lang.String, java.lang.String)
278 */
279 @Override
280 public void internalEntityDecl( String name,
281 String value ) {
282 // Add "synthetic" entity container to path to help prevent name collisions with XML elements
283 startNode(DnaDtdLexicon.ENTITY);
284 output.setProperty(currentPath, JcrLexicon.PRIMARY_TYPE, DnaDtdLexicon.ENTITY);
285 output.setProperty(currentPath, DnaDtdLexicon.NAME, name);
286 output.setProperty(currentPath, DnaDtdLexicon.VALUE, value);
287 // Record the name/value pair ...
288 entityValues.put(name, value);
289 endNode();
290 }
291
292 /**
293 * <p>
294 * {@inheritDoc}
295 * </p>
296 *
297 * @see org.xml.sax.helpers.DefaultHandler#processingInstruction(java.lang.String, java.lang.String)
298 */
299 @Override
300 public void processingInstruction( String target,
301 String data ) {
302 // Output separate nodes for each instruction since multiple are allowed
303 startNode(DnaXmlLexicon.PROCESSING_INSTRUCTION);
304 output.setProperty(currentPath, JcrLexicon.PRIMARY_TYPE, DnaXmlLexicon.PROCESSING_INSTRUCTION);
305 output.setProperty(currentPath, DnaXmlLexicon.TARGET, target.trim());
306 if (data != null) {
307 output.setProperty(currentPath, DnaXmlLexicon.PROCESSING_INSTRUCTION_CONTENT, data.trim());
308 }
309 endNode();
310 }
311
312 /**
313 * {@inheritDoc}
314 * <p>
315 * This method ensures that the namespace is registered with the {@link NamespaceRegistry registry}, using the supplied prefix
316 * to register the namespace if required. Note that because this class does not really use the namespace prefixes to create
317 * {@link Name} objects, no attempt is made to match the XML namespace prefixes.
318 * </p>
319 *
320 * @see org.xml.sax.helpers.DefaultHandler#startPrefixMapping(java.lang.String, java.lang.String)
321 */
322 @Override
323 public void startPrefixMapping( String prefix,
324 String uri ) {
325 assert uri != null;
326 // Add the prefix to the stack ...
327 LinkedList<String> prefixStack = this.prefixStackByUri.get(uri);
328 if (prefixStack == null) {
329 prefixStack = new LinkedList<String>();
330 this.prefixStackByUri.put(uri, prefixStack);
331 }
332 prefixStack.addFirst(prefix);
333
334 // If the namespace is already registered, then we'll have to register it in the context's registry, too.
335 if (!namespaceRegistry.isRegisteredNamespaceUri(uri)) {
336 // The namespace is not already registered (locally or in the context's registry), so we have to
337 // register it with the context's registry (which the local register then inherits).
338 NamespaceRegistry contextRegistry = context.getNamespaceRegistry();
339 if (contextRegistry.getNamespaceForPrefix(prefix) != null) {
340 // The prefix is already bound, so register and generate a unique prefix
341 context.getNamespaceRegistry().getPrefixForNamespaceUri(uri, true);
342 // Now register locally with the supplied prefix ...
343 namespaceRegistry.register(prefix, uri);
344 } else {
345 context.getNamespaceRegistry().register(prefix, uri);
346 }
347 } else {
348 // It is already registered, but re-register it locally using the supplied prefix ...
349 namespaceRegistry.register(prefix, uri);
350 }
351 }
352
353 /**
354 * {@inheritDoc}
355 *
356 * @see org.xml.sax.helpers.DefaultHandler#endPrefixMapping(java.lang.String)
357 */
358 @Override
359 public void endPrefixMapping( String prefix ) {
360 assert prefix != null;
361 // Get the current URI for this prefix ...
362 String uri = namespaceRegistry.getNamespaceForPrefix(prefix);
363 assert uri != null;
364
365 // Get the previous prefix from the stack ...
366 LinkedList<String> prefixStack = this.prefixStackByUri.get(uri);
367 assert prefixStack != null;
368 assert !prefixStack.isEmpty();
369 String existingPrefix = prefixStack.removeFirst();
370 assert prefix.equals(existingPrefix);
371
372 // If there are no previous prefixes, then remove the mapping ...
373 if (prefixStack.isEmpty()) {
374 namespaceRegistry.unregister(uri);
375 prefixStackByUri.remove(uri);
376 } else {
377 String previous = prefixStack.getFirst();
378 namespaceRegistry.register(previous, uri);
379 }
380 }
381
382 /**
383 * <p>
384 * {@inheritDoc}
385 * </p>
386 *
387 * @see org.xml.sax.ext.DefaultHandler2#startEntity(java.lang.String)
388 */
389 @Override
390 public void startEntity( String name ) {
391 // Record that we've started an entity by capturing the name of the entity ...
392 currentEntityName = name;
393 }
394
395 /**
396 * <p>
397 * {@inheritDoc}
398 * </p>
399 *
400 * @see org.xml.sax.ext.DefaultHandler2#endEntity(java.lang.String)
401 */
402 @Override
403 public void endEntity( String name ) {
404 // currentEntityName is nulled in 'characters(...)', not here.
405 // See DNA-231 for an issue related to this
406 }
407
408 /**
409 * <p>
410 * {@inheritDoc}
411 * </p>
412 *
413 * @see org.xml.sax.ext.DefaultHandler2#startCDATA()
414 */
415 @Override
416 public void startCDATA() {
417 // CDATA sections can start in the middle of element content, so there may already be some
418 // element content already processed ...
419 if (contentBuilder != null) endContent();
420
421 // Prepare builder for concatenating consecutive lines of CDATA
422 cDataContent = new StringBuilder();
423 }
424
425 /**
426 * {@inheritDoc}
427 *
428 * @see org.xml.sax.ext.DefaultHandler2#endCDATA()
429 */
430 @Override
431 public void endCDATA() {
432 // Output CDATA built in characters() method
433 startNode(DnaXmlLexicon.CDATA);
434 output.setProperty(currentPath, JcrLexicon.PRIMARY_TYPE, defaultPrimaryType);
435 output.setProperty(currentPath, DnaXmlLexicon.CDATA_CONTENT, cDataContent.toString());
436 endNode();
437 // Null-out builder to free memory
438 cDataContent = null;
439 }
440
441 /**
442 * {@inheritDoc}
443 *
444 * @see org.xml.sax.helpers.DefaultHandler#characters(char[], int, int)
445 */
446 @Override
447 public void characters( char[] ch,
448 int start,
449 int length ) {
450 String content = String.valueOf(ch, start, length);
451 if (cDataContent != null) {
452 // Processing the characters in the CDATA, so add to the builder
453 cDataContent.append(ch, start, length);
454 // Text within builder will be output at the end of CDATA
455 } else {
456 if (contentBuilder == null) {
457 // This is the first line of content, so we have to create the StringBuilder ...
458 contentBuilder = new StringBuilder();
459 }
460 if (currentEntityName != null) {
461 // This is an entity reference, so rather than use the entity value characters (the content passed
462 // into this method), we want to keep the entity reference ...
463 contentBuilder.append('&').append(currentEntityName).append(';');
464
465 // Normally, 'characters' is called with just the entity replacement characters,
466 // and is called between 'startEntity' and 'endEntity'. However, per DNA-231, some JVMs
467 // use an incorrect ordering: 'startEntity', 'endEntity' and then 'characters', and the
468 // content passed to the 'characters' call not only includes the entity replacement characters
469 // followed by other content. Look for this condition ...
470 String entityValue = entityValues.get(currentEntityName);
471 if (!content.equals(entityValue) && entityValue != null && entityValue.length() < content.length()) {
472 // Per DNA-231, there's extra content after the entity value. So replace the entity value in the
473 // content with the entity reference (not the replacement characters), and add the extra content ...
474 String extraContent = content.substring(entityValue.length());
475 contentBuilder.append(extraContent);
476 }
477 // We're done reading the entity characters, so null it out
478 currentEntityName = null;
479 } else {
480 // Just append the content normally ...
481 contentBuilder.append(content);
482 }
483 // Text within builder will be output when another element or CDATA is encountered
484 }
485 }
486
487 /**
488 * {@inheritDoc}
489 *
490 * @see org.xml.sax.ext.DefaultHandler2#comment(char[], int, int)
491 */
492 @Override
493 public void comment( char[] ch,
494 int start,
495 int length ) {
496 // Output separate nodes for each comment since multiple are allowed
497 startNode(DnaXmlLexicon.COMMENT);
498 output.setProperty(currentPath, JcrLexicon.PRIMARY_TYPE, DnaXmlLexicon.COMMENT);
499 output.setProperty(currentPath, DnaXmlLexicon.COMMENT_CONTENT, String.valueOf(ch, start, length).trim());
500 endNode();
501 }
502
503 /**
504 * {@inheritDoc}
505 *
506 * @see org.xml.sax.helpers.DefaultHandler#startElement(java.lang.String, java.lang.String, java.lang.String,
507 * org.xml.sax.Attributes)
508 */
509 @Override
510 public void startElement( String uri,
511 String localName,
512 String name,
513 Attributes attributes ) {
514 assert localName != null;
515
516 // Create the node with the name built from the element's name ...
517 Name nodeName = null;
518 if (nameAttribute != null) {
519 try {
520 String jcrNameValue = attributes.getValue(nameAttribute.getNamespaceUri(), nameAttribute.getLocalName());
521 nodeName = nameFactory.create(jcrNameValue);
522 } catch (ValueFormatException e) {
523 }
524 }
525 if (nodeName == null) nodeName = nameFactory.create(uri, localName, decoder);
526 startNode(nodeName);
527
528 // Set the type of the node ...
529 if (defaultPrimaryType != null) {
530 output.setProperty(currentPath, JcrLexicon.PRIMARY_TYPE, defaultPrimaryType);
531 }
532
533 // Now, set each attribute as a property ...
534 for (int i = 0, len = attributes.getLength(); i != len; ++i) {
535 String attributeLocalName = attributes.getLocalName(i);
536 String attributeUri = attributes.getURI(i);
537 Name attributeName = null;
538 if ((attributeUri == null || attributeUri.length() == 0) && attributes.getQName(i).indexOf(':') == -1) {
539 switch (this.attributeScoping) {
540 case INHERIT_ELEMENT_NAMESPACE:
541 attributeName = nameFactory.create(uri, attributeLocalName, decoder);
542 break;
543 case USE_DEFAULT_NAMESPACE:
544 attributeName = nameFactory.create(attributeLocalName, decoder);
545 break;
546 }
547 } else {
548 attributeName = nameFactory.create(attributeUri, attributeLocalName, decoder);
549 }
550 assert attributeName != null;
551 if (JcrLexicon.NAME.equals(attributeName)) {
552 // We don't want to record the "jcr:name" attribute since it won't match the node name ...
553 continue;
554 }
555 Object value = attributes.getValue(i);
556 if (JcrLexicon.PRIMARY_TYPE.equals(attributeName)) {
557 // Convert it to a name ...
558 value = nameFactory.create(value);
559 }
560 output.setProperty(currentPath, attributeName, attributes.getValue(i));
561 }
562 }
563
564 /**
565 * {@inheritDoc}
566 *
567 * @see org.jboss.dna.graph.xml.XmlHandler#endElement(java.lang.String, java.lang.String, java.lang.String)
568 */
569 @Override
570 public void endElement( String uri,
571 String localName,
572 String name ) {
573 // Check if content still needs to be output
574 if (contentBuilder != null) endContent();
575
576 // End the current node ...
577 endNode();
578 }
579
580 /**
581 * <p>
582 * {@inheritDoc}
583 * </p>
584 *
585 * @see org.xml.sax.helpers.DefaultHandler#warning(org.xml.sax.SAXParseException)
586 */
587 @Override
588 public void warning( SAXParseException warning ) {
589 problems.addWarning(warning, XmlSequencerI18n.warningSequencingXmlDocument, warning);
590 }
591
592 /**
593 * {@inheritDoc}
594 *
595 * @see org.xml.sax.helpers.DefaultHandler#error(org.xml.sax.SAXParseException)
596 */
597 @Override
598 public void error( SAXParseException error ) {
599 problems.addError(error, XmlSequencerI18n.errorSequencingXmlDocument, error);
600 }
601
602 /**
603 * {@inheritDoc}
604 *
605 * @see org.xml.sax.helpers.DefaultHandler#fatalError(org.xml.sax.SAXParseException)
606 */
607 @Override
608 public void fatalError( SAXParseException error ) {
609 problems.addError(error, XmlSequencerI18n.errorSequencingXmlDocument, error);
610 }
611
612 private class IndexedName {
613
614 Map<Name, List<IndexedName>> nameToIndexedNamesMap = new HashMap<Name, List<IndexedName>>();
615
616 IndexedName() {
617 }
618 }
619 }