1 /*
2 * ModeShape (http://www.modeshape.org)
3 * See the COPYRIGHT.txt file distributed with this work for information
4 * regarding copyright ownership. Some portions may be licensed
5 * to Red Hat, Inc. under one or more contributor license agreements.
6 * See the AUTHORS.txt file in the distribution for a full listing of
7 * individual contributors.
8 *
9 * ModeShape is free software. Unless otherwise indicated, all code in ModeShape
10 * is licensed to you under the terms of the GNU Lesser General Public License as
11 * published by the Free Software Foundation; either version 2.1 of
12 * the License, or (at your option) any later version.
13 *
14 * ModeShape is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this software; if not, write to the Free
21 * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
22 * 02110-1301 USA, or see the FSF site: http://www.fsf.org.
23 */
24 package org.modeshape.sequencer.xml;
25
26 import java.io.InputStream;
27 import org.modeshape.common.text.TextDecoder;
28 import org.modeshape.graph.JcrNtLexicon;
29 import org.modeshape.graph.property.Name;
30 import org.modeshape.graph.sequencer.SequencerOutput;
31 import org.modeshape.graph.sequencer.StreamSequencer;
32 import org.modeshape.graph.sequencer.StreamSequencerContext;
33 import org.xml.sax.InputSource;
34 import org.xml.sax.SAXNotRecognizedException;
35 import org.xml.sax.SAXNotSupportedException;
36 import org.xml.sax.XMLReader;
37 import org.xml.sax.helpers.XMLReaderFactory;
38
39 /**
40 * A sequencer for XML files, which maintains DTD, entity, comments, and other content. Note that by default the sequencer uses
41 * the {@link XmlSequencer.AttributeScoping#USE_DEFAULT_NAMESPACE default namespace} for unqualified attribute rather than
42 * {@link XmlSequencer.AttributeScoping#INHERIT_ELEMENT_NAMESPACE inheriting the namespace from the element}. (See also
43 * {@link InheritingXmlSequencer}.
44 */
45 public class XmlSequencer implements StreamSequencer {
46
47 /**
48 * The choices for how attributes that have no namespace prefix should be assigned a namespace.
49 *
50 * @author Randall Hauch
51 */
52 public enum AttributeScoping {
53 /** The attribute's namespace is the default namespace */
54 USE_DEFAULT_NAMESPACE,
55 /** The attribute's namespace is the same namespace as the containing element */
56 INHERIT_ELEMENT_NAMESPACE;
57 }
58
59 /*package*/static final String DEFAULT_PRIMARY_TYPE = "nt:unstructured";
60 /*package*/static final String DECL_HANDLER_FEATURE = "http://xml.org/sax/properties/declaration-handler" target="alexandria_uri">http://xml.org/sax/properties/declaration-handler";
61 /*package*/static final String ENTITY_RESOLVER_2_FEATURE = "http://xml.org/sax/features/use-entity-resolver2" target="alexandria_uri">http://xml.org/sax/features/use-entity-resolver2";
62 /*package*/static final String LEXICAL_HANDLER_FEATURE = "http://xml.org/sax/properties/lexical-handler" target="alexandria_uri">http://xml.org/sax/properties/lexical-handler";
63 /*package*/static final String RESOLVE_DTD_URIS_FEATURE = "http://xml.org/sax/features/resolve-dtd-uris" target="alexandria_uri">http://xml.org/sax/features/resolve-dtd-uris";
64 /*package*/static final String LOAD_EXTERNAL_DTDS_FEATURE = "http://apache.org/xml/features/nonvalidating/load-external-dtd" target="alexandria_uri">http://apache.org/xml/features/nonvalidating/load-external-dtd";
65
66 private AttributeScoping scoping = AttributeScoping.USE_DEFAULT_NAMESPACE;
67
68 /**
69 * @param scoping Sets scoping to the specified value.
70 */
71 public void setAttributeScoping( AttributeScoping scoping ) {
72 this.scoping = scoping;
73 }
74
75 /**
76 * @return scoping
77 */
78 public AttributeScoping getAttributeScoping() {
79 return scoping;
80 }
81
82 /**
83 * {@inheritDoc}
84 *
85 * @see org.modeshape.graph.sequencer.StreamSequencer#sequence(InputStream, SequencerOutput, StreamSequencerContext)
86 */
87 public void sequence( InputStream stream,
88 SequencerOutput output,
89 StreamSequencerContext context ) {
90 XMLReader reader;
91 try {
92 // Set up the XML handler ...
93 Name primaryType = JcrNtLexicon.UNSTRUCTURED;
94 Name nameAttribute = null;
95 TextDecoder decoder = null;
96 XmlSequencerHandler handler = new XmlSequencerHandler(output, context, nameAttribute, primaryType, decoder, scoping);
97 // Create the reader ...
98 reader = XMLReaderFactory.createXMLReader();
99 reader.setContentHandler(handler);
100 reader.setErrorHandler(handler);
101 // Ensure handler acting as entity resolver 2
102 reader.setProperty(DECL_HANDLER_FEATURE, handler);
103 // Ensure handler acting as lexical handler
104 reader.setProperty(LEXICAL_HANDLER_FEATURE, handler);
105 // Ensure handler acting as entity resolver 2
106 setFeature(reader, ENTITY_RESOLVER_2_FEATURE, true);
107 // Prevent loading of external DTDs
108 setFeature(reader, LOAD_EXTERNAL_DTDS_FEATURE, false);
109 // Prevent the resolving of DTD entities into fully-qualified URIS
110 setFeature(reader, RESOLVE_DTD_URIS_FEATURE, false);
111 // Parse XML document
112 reader.parse(new InputSource(stream));
113 } catch (Exception error) {
114 context.getLogger(getClass()).error(error, XmlSequencerI18n.fatalErrorSequencingXmlDocument, error);
115 context.getProblems().addError(error, XmlSequencerI18n.fatalErrorSequencingXmlDocument, error);
116 }
117 }
118
119 /**
120 * Sets the reader's named feature to the supplied value, only if the feature is not already set to that value. This method
121 * does nothing if the feature is not known to the reader.
122 *
123 * @param reader the reader; may not be null
124 * @param featureName the name of the feature; may not be null
125 * @param value the value for the feature
126 */
127 /*package*/static void setFeature( XMLReader reader,
128 String featureName,
129 boolean value ) {
130 try {
131 if (reader.getFeature(featureName) != value) {
132 reader.setFeature(featureName, value);
133 }
134 } catch (SAXNotRecognizedException meansFeatureNotRecognized) {
135 } catch (SAXNotSupportedException meansFeatureNotSupported) {
136 }
137 }
138
139 }