View Javadoc

1   /*
2    * ModeShape (http://www.modeshape.org)
3    * See the COPYRIGHT.txt file distributed with this work for information
4    * regarding copyright ownership.  Some portions may be licensed
5    * to Red Hat, Inc. under one or more contributor license agreements.
6    * See the AUTHORS.txt file in the distribution for a full listing of 
7    * individual contributors.
8    *
9    * ModeShape is free software. Unless otherwise indicated, all code in ModeShape
10   * is licensed to you under the terms of the GNU Lesser General Public License as
11   * published by the Free Software Foundation; either version 2.1 of
12   * the License, or (at your option) any later version.
13   * 
14   * ModeShape is distributed in the hope that it will be useful,
15   * but WITHOUT ANY WARRANTY; without even the implied warranty of
16   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17   * Lesser General Public License for more details.
18   *
19   * You should have received a copy of the GNU Lesser General Public
20   * License along with this software; if not, write to the Free
21   * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
22   * 02110-1301 USA, or see the FSF site: http://www.fsf.org.
23   */
24  package org.modeshape.jcr;
25  
26  import java.io.IOException;
27  import java.io.OutputStream;
28  import java.text.CharacterIterator;
29  import java.text.StringCharacterIterator;
30  import java.util.Collections;
31  import java.util.HashSet;
32  import java.util.Set;
33  import javax.jcr.Node;
34  import javax.jcr.NodeIterator;
35  import javax.jcr.Property;
36  import javax.jcr.PropertyIterator;
37  import javax.jcr.PropertyType;
38  import javax.jcr.RepositoryException;
39  import javax.jcr.Value;
40  import net.jcip.annotations.NotThreadSafe;
41  import org.modeshape.common.text.TextEncoder;
42  import org.modeshape.common.text.XmlNameEncoder;
43  import org.modeshape.common.util.Base64;
44  import org.modeshape.graph.ExecutionContext;
45  import org.modeshape.graph.property.Name;
46  import org.modeshape.graph.property.ValueFactories;
47  import org.modeshape.graph.property.ValueFactory;
48  import org.xml.sax.ContentHandler;
49  import org.xml.sax.SAXException;
50  import org.xml.sax.helpers.AttributesImpl;
51  
52  /**
53   * Implementation of {@link AbstractJcrExporter} that implements the document view mapping described in section 6.4.2 of the JCR
54   * 1.0 specification.
55   * 
56   * @see JcrSession#exportDocumentView(String, ContentHandler, boolean, boolean)
57   * @see JcrSession#exportDocumentView(String, OutputStream, boolean, boolean)
58   */
59  @NotThreadSafe
60  class JcrDocumentViewExporter extends AbstractJcrExporter {
61  
62      private static final int ENCODE_BUFFER_SIZE = 2 << 15;
63  
64      private static final TextEncoder VALUE_ENCODER = new JcrDocumentViewExporter.JcrDocumentViewPropertyEncoder();
65      private final ValueFactory<String> stringFactory;
66  
67      JcrDocumentViewExporter( JcrSession session ) {
68          super(session, Collections.<String>emptyList());
69          stringFactory = session.getExecutionContext().getValueFactories().getStringFactory();
70      }
71  
72      /**
73       * Exports <code>node</code> (or the subtree rooted at <code>node</code>) into an XML document by invoking SAX events on
74       * <code>contentHandler</code>.
75       * 
76       * @param node the node which should be exported. If <code>noRecursion</code> was set to <code>false</code> in the
77       *        constructor, the entire subtree rooted at <code>node</code> will be exported.
78       * @param contentHandler the SAX content handler for which SAX events will be invoked as the XML document is created.
79       * @param skipBinary if <code>true</code>, indicates that binary properties should not be exported
80       * @param noRecurse if<code>true</code>, indicates that only the given node should be exported, otherwise a recursive export
81       *        and not any of its child nodes.
82       * @throws SAXException if an exception occurs during generation of the XML document
83       * @throws RepositoryException if an exception occurs accessing the content repository
84       */
85      @Override
86      public void exportNode( Node node,
87                              ContentHandler contentHandler,
88                              boolean skipBinary,
89                              boolean noRecurse ) throws RepositoryException, SAXException {
90          ExecutionContext executionContext = session.getExecutionContext();
91  
92          if (node instanceof JcrSharedNode) {
93              // This is a shared node, and per Section 14.7 of the JCR 2.0 specification, they have to be written out
94              // in a special way ...
95              AbstractJcrNode sharedNode = ((JcrSharedNode)node).proxyNode();
96              AttributesImpl atts = new AttributesImpl();
97  
98              // jcr:primaryType = nt:share ...
99              addAttribute(atts, JcrLexicon.PRIMARY_TYPE, PropertyType.NAME, JcrNtLexicon.SHARE);
100 
101             // jcr:uuid = UUID of shared node ...
102             addAttribute(atts, JcrLexicon.UUID, PropertyType.STRING, node.getIdentifier());
103 
104             // Write out the element ...
105             Name name = sharedNode.segment().getName();
106             startElement(contentHandler, name, atts);
107             endElement(contentHandler, name);
108             return;
109         }
110 
111         // If this node is a special xmltext node, output it as raw content (see JCR 1.0 spec - section 6.4.2.3)
112         if (node.getDepth() > 0 && isXmlTextNode(node)) {
113             String xmlCharacters = getXmlCharacters(node);
114             contentHandler.characters(xmlCharacters.toCharArray(), 0, xmlCharacters.length());
115             return;
116         }
117 
118         // Build the attributes for this node's element, but add the primary type first ...
119         AttributesImpl atts = new AttributesImpl();
120         Property primaryType = ((AbstractJcrNode)node).getProperty(JcrLexicon.PRIMARY_TYPE);
121         if (primaryType != null) {
122             addAttribute(atts, primaryType, skipBinary, false);
123         }
124 
125         // And add the remaining properties next ...
126         PropertyIterator properties = node.getProperties();
127         while (properties.hasNext()) {
128             Property prop = properties.nextProperty();
129             addAttribute(atts, prop, skipBinary, true);
130         }
131 
132         // Special case to stub in name for root node as per JCR 1.0 Spec - 6.4.2.2
133         Name name = null;
134         ValueFactories valueFactories = executionContext.getValueFactories();
135         if (node.getDepth() == 0) {
136             name = JcrLexicon.ROOT;
137         } else {
138             name = valueFactories.getNameFactory().create(node.getName());
139         }
140 
141         // Write out the element ...
142         startElement(contentHandler, name, atts);
143         if (!noRecurse) {
144             NodeIterator nodes = node.getNodes();
145             while (nodes.hasNext()) {
146                 exportNode(nodes.nextNode(), contentHandler, skipBinary, noRecurse);
147             }
148         }
149         endElement(contentHandler, name);
150     }
151 
152     protected void addAttribute( AttributesImpl atts,
153                                  Name propertyName,
154                                  int propertyType,
155                                  Object value ) {
156         String valueAsString = VALUE_ENCODER.encode(stringFactory.create(value));
157         String localPropName = getPrefixedName(propertyName);
158         atts.addAttribute(propertyName.getNamespaceUri(),
159                           propertyName.getLocalName(),
160                           localPropName,
161                           PropertyType.nameFromValue(propertyType),
162                           valueAsString);
163 
164     }
165 
166     protected void addAttribute( AttributesImpl atts,
167                                  Property prop,
168                                  boolean skipBinary,
169                                  boolean skipPrimaryType ) throws RepositoryException {
170 
171         Name propName = ((AbstractJcrProperty)prop).name();
172         if (skipPrimaryType && JcrLexicon.PRIMARY_TYPE.equals(propName)) return;
173 
174         String localPropName = getPrefixedName(propName);
175 
176         if (skipBinary && PropertyType.BINARY == prop.getType()) {
177             atts.addAttribute(propName.getNamespaceUri(),
178                               propName.getLocalName(),
179                               localPropName,
180                               PropertyType.nameFromValue(prop.getType()),
181                               "");
182             return;
183         }
184 
185         Value value;
186         if (prop instanceof JcrSingleValueProperty) {
187             value = prop.getValue();
188         } else {
189             // Only output the first value of the multi-valued property.
190             // This is acceptable as per JCR 1.0 Spec (section 6.4.2.5)
191             value = prop.getValues()[0];
192         }
193 
194         String valueAsString;
195         if (PropertyType.BINARY == prop.getType()) {
196             StringBuffer buff = new StringBuffer(ENCODE_BUFFER_SIZE);
197             try {
198                 Base64.InputStream is = new Base64.InputStream(value.getBinary().getStream(), Base64.ENCODE);
199 
200                 byte[] bytes = new byte[ENCODE_BUFFER_SIZE];
201                 int len;
202                 while (-1 != (len = is.read(bytes, 0, ENCODE_BUFFER_SIZE))) {
203                     buff.append(new String(bytes, 0, len));
204                 }
205             } catch (IOException ioe) {
206                 throw new RepositoryException(ioe);
207             }
208             valueAsString = buff.toString();
209         } else {
210             valueAsString = VALUE_ENCODER.encode(value.getString());
211         }
212 
213         atts.addAttribute(propName.getNamespaceUri(),
214                           propName.getLocalName(),
215                           localPropName,
216                           PropertyType.nameFromValue(prop.getType()),
217                           valueAsString);
218     }
219 
220     /**
221      * Indicates whether the current node is an XML text node as per section 6.4.2.3 of the JCR 1.0 specification. XML text nodes
222      * are nodes that have the name &quot;jcr:xmltext&quot; and only one property (besides the mandatory
223      * &quot;jcr:primaryType&quot;). The property must have a property name of &quot;jcr:xmlcharacters&quot;, a type of
224      * <code>String</code>, and does not have multiple values.
225      * <p/>
226      * In practice, this is handled in ModeShape by making XML text nodes have a type of &quot;dna:xmltext&quot;, which enforces
227      * these property characteristics.
228      * 
229      * @param node the node to test
230      * @return whether this node is a special xml text node
231      * @throws RepositoryException if there is an error accessing the repository
232      */
233     private boolean isXmlTextNode( Node node ) throws RepositoryException {
234         // ./xmltext/xmlcharacters exception (see JSR-170 Spec 6.4.2.3)
235 
236         if (getPrefixedName(JcrLexicon.XMLTEXT).equals(node.getName())) {
237             if (node.getNodes().getSize() == 0) {
238 
239                 PropertyIterator properties = node.getProperties();
240                 boolean xmlCharactersFound = false;
241 
242                 while (properties.hasNext()) {
243                     Property property = properties.nextProperty();
244 
245                     if (getPrefixedName(JcrLexicon.PRIMARY_TYPE).equals(property.getName())) {
246                         continue;
247                     }
248 
249                     if (getPrefixedName(JcrLexicon.XMLCHARACTERS).equals(property.getName())) {
250                         xmlCharactersFound = true;
251                         continue;
252                     }
253 
254                     // If the xmltext node has any properties other than primaryType or xmlcharacters, return false;
255                     return false;
256                 }
257 
258                 return xmlCharactersFound;
259             }
260         }
261 
262         return false;
263 
264     }
265 
266     /**
267      * Returns the XML characters for the given node. The node must be an XML text node, as defined in
268      * {@link #isXmlTextNode(Node)}.
269      * 
270      * @param node the node for which XML characters will be retrieved.
271      * @return the xml characters for this node
272      * @throws RepositoryException if there is an error accessing this node
273      */
274     private String getXmlCharacters( Node node ) throws RepositoryException {
275         // ./xmltext/xmlcharacters exception (see JSR-170 Spec 6.4.2.3)
276 
277         assert isXmlTextNode(node);
278 
279         Property xmlCharacters = node.getProperty(getPrefixedName(JcrLexicon.XMLCHARACTERS));
280 
281         assert xmlCharacters != null;
282 
283         if (xmlCharacters.getDefinition().isMultiple()) {
284             StringBuffer buff = new StringBuffer();
285 
286             for (Value value : xmlCharacters.getValues()) {
287                 buff.append(value.getString());
288             }
289 
290             return buff.toString();
291         }
292 
293         return xmlCharacters.getValue().getString();
294     }
295 
296     /**
297      * Special {@link TextEncoder} that implements the subset of XML name encoding suggested by section 6.4.4 of the JCR 1.0.1
298      * specification. This encoder only encodes space (0x20), carriage return (0x0D), new line (0x0A), tab (0x09), and any
299      * underscore characters that might otherwise suggest an encoding, as defined in {@link XmlNameEncoder}.
300      */
301     protected static class JcrDocumentViewPropertyEncoder extends XmlNameEncoder {
302 
303         private static final Set<Character> MAPPED_CHARACTERS;
304 
305         static {
306             MAPPED_CHARACTERS = new HashSet<Character>();
307             MAPPED_CHARACTERS.add(' ');
308             MAPPED_CHARACTERS.add('\r');
309             MAPPED_CHARACTERS.add('\n');
310             MAPPED_CHARACTERS.add('\t');
311 
312         }
313 
314         /**
315          * {@inheritDoc}
316          * 
317          * @see org.modeshape.common.text.TextEncoder#encode(java.lang.String)
318          */
319         // See section 6.4.4 of the JCR 1.0.1 spec for why these hoops must be jumped through
320         @Override
321         public String encode( String text ) {
322             if (text == null) return null;
323             if (text.length() == 0) return text;
324             StringBuilder sb = new StringBuilder();
325             String hex = null;
326             CharacterIterator iter = new StringCharacterIterator(text);
327             for (char c = iter.first(); c != CharacterIterator.DONE; c = iter.next()) {
328                 if (c == '_') {
329                     // Read the next character (if there is one) ...
330                     char next = iter.next();
331                     if (next == CharacterIterator.DONE) {
332                         sb.append(c);
333                         break;
334                     }
335                     // If the next character is not 'x', then these are just regular characters ...
336                     if (next != 'x') {
337                         sb.append(c).append(next);
338                         continue;
339                     }
340                     // The next character is 'x', so write out the '_' character in encoded form ...
341                     sb.append("_x005f_");
342                     // And then write out the next character ...
343                     sb.append(next);
344                 } else if (!MAPPED_CHARACTERS.contains(c)) {
345                     // Legal characters for an XML Name ...
346                     sb.append(c);
347                 } else {
348                     // All other characters must be escaped with '_xHHHH_' where 'HHHH' is the hex string for the code point
349                     hex = Integer.toHexString(c);
350                     // The hex string excludes the leading '0's, so check the character values so we know how many to prepend
351                     if (c >= '\u0000' && c <= '\u000f') {
352                         sb.append("_x000").append(hex);
353                     } else if (c >= '\u0010' && c <= '\u00ff') {
354                         sb.append("_x00").append(hex);
355                     } else if (c >= '\u0100' && c <= '\u0fff') {
356                         sb.append("_x0").append(hex);
357                     } else {
358                         sb.append("_x").append(hex);
359                     }
360                     sb.append('_');
361                 }
362             }
363             return sb.toString();
364         }
365     }
366 }