1 /* 2 * ModeShape (http://www.modeshape.org) 3 * See the COPYRIGHT.txt file distributed with this work for information 4 * regarding copyright ownership. Some portions may be licensed 5 * to Red Hat, Inc. under one or more contributor license agreements. 6 * See the AUTHORS.txt file in the distribution for a full listing of 7 * individual contributors. 8 * 9 * ModeShape is free software. Unless otherwise indicated, all code in ModeShape 10 * is licensed to you under the terms of the GNU Lesser General Public License as 11 * published by the Free Software Foundation; either version 2.1 of 12 * the License, or (at your option) any later version. 13 * 14 * ModeShape is distributed in the hope that it will be useful, 15 * but WITHOUT ANY WARRANTY; without even the implied warranty of 16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 17 * Lesser General Public License for more details. 18 * 19 * You should have received a copy of the GNU Lesser General Public 20 * License along with this software; if not, write to the Free 21 * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 22 * 02110-1301 USA, or see the FSF site: http://www.fsf.org. 23 */ 24 package org.modeshape.sequencer.xml; 25 26 import java.io.InputStream; 27 import org.modeshape.common.text.TextDecoder; 28 import org.modeshape.graph.JcrNtLexicon; 29 import org.modeshape.graph.property.Name; 30 import org.modeshape.graph.sequencer.SequencerOutput; 31 import org.modeshape.graph.sequencer.StreamSequencer; 32 import org.modeshape.graph.sequencer.StreamSequencerContext; 33 import org.xml.sax.InputSource; 34 import org.xml.sax.SAXNotRecognizedException; 35 import org.xml.sax.SAXNotSupportedException; 36 import org.xml.sax.XMLReader; 37 import org.xml.sax.helpers.XMLReaderFactory; 38 39 /** 40 * A sequencer for XML files, which maintains DTD, entity, comments, and other content. Note that by default the sequencer uses 41 * the {@link XmlSequencer.AttributeScoping#USE_DEFAULT_NAMESPACE default namespace} for unqualified attribute rather than 42 * {@link XmlSequencer.AttributeScoping#INHERIT_ELEMENT_NAMESPACE inheriting the namespace from the element}. (See also 43 * {@link InheritingXmlSequencer}. 44 */ 45 public class XmlSequencer implements StreamSequencer { 46 47 /** 48 * The choices for how attributes that have no namespace prefix should be assigned a namespace. 49 * 50 * @author Randall Hauch 51 */ 52 public enum AttributeScoping { 53 /** The attribute's namespace is the default namespace */ 54 USE_DEFAULT_NAMESPACE, 55 /** The attribute's namespace is the same namespace as the containing element */ 56 INHERIT_ELEMENT_NAMESPACE; 57 } 58 59 /*package*/static final String DEFAULT_PRIMARY_TYPE = "nt:unstructured"; 60 /*package*/static final String DECL_HANDLER_FEATURE = "http://xml.org/sax/properties/declaration-handler" target="alexandria_uri">http://xml.org/sax/properties/declaration-handler"; 61 /*package*/static final String ENTITY_RESOLVER_2_FEATURE = "http://xml.org/sax/features/use-entity-resolver2" target="alexandria_uri">http://xml.org/sax/features/use-entity-resolver2"; 62 /*package*/static final String LEXICAL_HANDLER_FEATURE = "http://xml.org/sax/properties/lexical-handler" target="alexandria_uri">http://xml.org/sax/properties/lexical-handler"; 63 /*package*/static final String RESOLVE_DTD_URIS_FEATURE = "http://xml.org/sax/features/resolve-dtd-uris" target="alexandria_uri">http://xml.org/sax/features/resolve-dtd-uris"; 64 /*package*/static final String LOAD_EXTERNAL_DTDS_FEATURE = "http://apache.org/xml/features/nonvalidating/load-external-dtd" target="alexandria_uri">http://apache.org/xml/features/nonvalidating/load-external-dtd"; 65 66 private AttributeScoping scoping = AttributeScoping.USE_DEFAULT_NAMESPACE; 67 68 /** 69 * @param scoping Sets scoping to the specified value. 70 */ 71 public void setAttributeScoping( AttributeScoping scoping ) { 72 this.scoping = scoping; 73 } 74 75 /** 76 * @return scoping 77 */ 78 public AttributeScoping getAttributeScoping() { 79 return scoping; 80 } 81 82 /** 83 * {@inheritDoc} 84 * 85 * @see org.modeshape.graph.sequencer.StreamSequencer#sequence(InputStream, SequencerOutput, StreamSequencerContext) 86 */ 87 public void sequence( InputStream stream, 88 SequencerOutput output, 89 StreamSequencerContext context ) { 90 XMLReader reader; 91 try { 92 // Set up the XML handler ... 93 Name primaryType = JcrNtLexicon.UNSTRUCTURED; 94 Name nameAttribute = null; 95 TextDecoder decoder = null; 96 XmlSequencerHandler handler = new XmlSequencerHandler(output, context, nameAttribute, primaryType, decoder, scoping); 97 // Create the reader ... 98 reader = XMLReaderFactory.createXMLReader(); 99 reader.setContentHandler(handler); 100 reader.setErrorHandler(handler); 101 // Ensure handler acting as entity resolver 2 102 reader.setProperty(DECL_HANDLER_FEATURE, handler); 103 // Ensure handler acting as lexical handler 104 reader.setProperty(LEXICAL_HANDLER_FEATURE, handler); 105 // Ensure handler acting as entity resolver 2 106 setFeature(reader, ENTITY_RESOLVER_2_FEATURE, true); 107 // Prevent loading of external DTDs 108 setFeature(reader, LOAD_EXTERNAL_DTDS_FEATURE, false); 109 // Prevent the resolving of DTD entities into fully-qualified URIS 110 setFeature(reader, RESOLVE_DTD_URIS_FEATURE, false); 111 // Parse XML document 112 reader.parse(new InputSource(stream)); 113 } catch (Exception error) { 114 context.getLogger(getClass()).error(error, XmlSequencerI18n.fatalErrorSequencingXmlDocument, error); 115 context.getProblems().addError(error, XmlSequencerI18n.fatalErrorSequencingXmlDocument, error); 116 } 117 } 118 119 /** 120 * Sets the reader's named feature to the supplied value, only if the feature is not already set to that value. This method 121 * does nothing if the feature is not known to the reader. 122 * 123 * @param reader the reader; may not be null 124 * @param featureName the name of the feature; may not be null 125 * @param value the value for the feature 126 */ 127 /*package*/static void setFeature( XMLReader reader, 128 String featureName, 129 boolean value ) { 130 try { 131 if (reader.getFeature(featureName) != value) { 132 reader.setFeature(featureName, value); 133 } 134 } catch (SAXNotRecognizedException meansFeatureNotRecognized) { 135 } catch (SAXNotSupportedException meansFeatureNotSupported) { 136 } 137 } 138 139 }