001 /*
002 * JBoss DNA (http://www.jboss.org/dna)
003 * See the COPYRIGHT.txt file distributed with this work for information
004 * regarding copyright ownership. Some portions may be licensed
005 * to Red Hat, Inc. under one or more contributor license agreements.
006 * See the AUTHORS.txt file in the distribution for a full listing of
007 * individual contributors.
008 *
009 * JBoss DNA is free software. Unless otherwise indicated, all code in JBoss DNA
010 * is licensed to you under the terms of the GNU Lesser General Public License as
011 * published by the Free Software Foundation; either version 2.1 of
012 * the License, or (at your option) any later version.
013 *
014 * JBoss DNA is distributed in the hope that it will be useful,
015 * but WITHOUT ANY WARRANTY; without even the implied warranty of
016 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
017 * Lesser General Public License for more details.
018 *
019 * You should have received a copy of the GNU Lesser General Public
020 * License along with this software; if not, write to the Free
021 * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
022 * 02110-1301 USA, or see the FSF site: http://www.fsf.org.
023 */
024 package org.jboss.dna.sequencer.xml;
025
026 import java.io.InputStream;
027 import org.jboss.dna.common.text.TextDecoder;
028 import org.jboss.dna.graph.JcrNtLexicon;
029 import org.jboss.dna.graph.property.Name;
030 import org.jboss.dna.graph.sequencer.SequencerContext;
031 import org.jboss.dna.graph.sequencer.SequencerOutput;
032 import org.jboss.dna.graph.sequencer.StreamSequencer;
033 import org.xml.sax.InputSource;
034 import org.xml.sax.SAXNotRecognizedException;
035 import org.xml.sax.SAXNotSupportedException;
036 import org.xml.sax.XMLReader;
037 import org.xml.sax.helpers.XMLReaderFactory;
038
039 /**
040 * A sequencer for XML files, which maintains DTD, entity, comments, and other content. Note that by default the sequencer uses
041 * the {@link XmlSequencer.AttributeScoping#USE_DEFAULT_NAMESPACE default namespace} for unqualified attribute rather than
042 * {@link XmlSequencer.AttributeScoping#INHERIT_ELEMENT_NAMESPACE inheriting the namespace from the element}. (See also
043 * {@link InheritingXmlSequencer}.
044 *
045 * @author John Verhaeg
046 */
047 public class XmlSequencer implements StreamSequencer {
048
049 /**
050 * The choices for how attributes that have no namespace prefix should be assigned a namespace.
051 *
052 * @author Randall Hauch
053 */
054 public enum AttributeScoping {
055 /** The attribute's namespace is the default namespace */
056 USE_DEFAULT_NAMESPACE,
057 /** The attribute's namespace is the same namespace as the containing element */
058 INHERIT_ELEMENT_NAMESPACE;
059 }
060
061 /*package*/static final String DEFAULT_PRIMARY_TYPE = "nt:unstructured";
062 /*package*/static final String DECL_HANDLER_FEATURE = "http://xml.org/sax/properties/declaration-handler";
063 /*package*/static final String ENTITY_RESOLVER_2_FEATURE = "http://xml.org/sax/features/use-entity-resolver2";
064 /*package*/static final String LEXICAL_HANDLER_FEATURE = "http://xml.org/sax/properties/lexical-handler";
065 /*package*/static final String RESOLVE_DTD_URIS_FEATURE = "http://xml.org/sax/features/resolve-dtd-uris";
066 /*package*/static final String LOAD_EXTERNAL_DTDS_FEATURE = "http://apache.org/xml/features/nonvalidating/load-external-dtd";
067
068 private AttributeScoping scoping = AttributeScoping.USE_DEFAULT_NAMESPACE;
069
070 /**
071 * @param scoping Sets scoping to the specified value.
072 */
073 public void setAttributeScoping( AttributeScoping scoping ) {
074 this.scoping = scoping;
075 }
076
077 /**
078 * @return scoping
079 */
080 public AttributeScoping getAttributeScoping() {
081 return scoping;
082 }
083
084 /**
085 * {@inheritDoc}
086 *
087 * @see org.jboss.dna.graph.sequencer.StreamSequencer#sequence(InputStream, SequencerOutput, SequencerContext)
088 */
089 public void sequence( InputStream stream,
090 SequencerOutput output,
091 SequencerContext context ) {
092 XMLReader reader;
093 try {
094 // Set up the XML handler ...
095 Name primaryType = JcrNtLexicon.UNSTRUCTURED;
096 Name nameAttribute = null;
097 TextDecoder decoder = null;
098 XmlSequencerHandler handler = new XmlSequencerHandler(output, context, nameAttribute, primaryType, decoder, scoping);
099 // Create the reader ...
100 reader = XMLReaderFactory.createXMLReader();
101 reader.setContentHandler(handler);
102 reader.setErrorHandler(handler);
103 // Ensure handler acting as entity resolver 2
104 reader.setProperty(DECL_HANDLER_FEATURE, handler);
105 // Ensure handler acting as lexical handler
106 reader.setProperty(LEXICAL_HANDLER_FEATURE, handler);
107 // Ensure handler acting as entity resolver 2
108 setFeature(reader, ENTITY_RESOLVER_2_FEATURE, true);
109 // Prevent loading of external DTDs
110 setFeature(reader, LOAD_EXTERNAL_DTDS_FEATURE, false);
111 // Prevent the resolving of DTD entities into fully-qualified URIS
112 setFeature(reader, RESOLVE_DTD_URIS_FEATURE, false);
113 // Parse XML document
114 reader.parse(new InputSource(stream));
115 } catch (Exception error) {
116 context.getLogger(getClass()).error(error, XmlSequencerI18n.fatalErrorSequencingXmlDocument, error);
117 context.getProblems().addError(error, XmlSequencerI18n.fatalErrorSequencingXmlDocument, error);
118 }
119 }
120
121 /**
122 * Sets the reader's named feature to the supplied value, only if the feature is not already set to that value. This method
123 * does nothing if the feature is not known to the reader.
124 *
125 * @param reader the reader; may not be null
126 * @param featureName the name of the feature; may not be null
127 * @param value the value for the feature
128 */
129 /*package*/static void setFeature( XMLReader reader,
130 String featureName,
131 boolean value ) {
132 try {
133 if (reader.getFeature(featureName) != value) {
134 reader.setFeature(featureName, value);
135 }
136 } catch (SAXNotRecognizedException meansFeatureNotRecognized) {
137 } catch (SAXNotSupportedException meansFeatureNotSupported) {
138 }
139 }
140
141 }