View Javadoc

1   /*
2    * ModeShape (http://www.modeshape.org)
3    * See the COPYRIGHT.txt file distributed with this work for information
4    * regarding copyright ownership.  Some portions may be licensed
5    * to Red Hat, Inc. under one or more contributor license agreements.
6    * See the AUTHORS.txt file in the distribution for a full listing of 
7    * individual contributors.
8    *
9    * ModeShape is free software. Unless otherwise indicated, all code in ModeShape
10   * is licensed to you under the terms of the GNU Lesser General Public License as
11   * published by the Free Software Foundation; either version 2.1 of
12   * the License, or (at your option) any later version.
13   * 
14   * ModeShape is distributed in the hope that it will be useful,
15   * but WITHOUT ANY WARRANTY; without even the implied warranty of
16   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17   * Lesser General Public License for more details.
18   *
19   * You should have received a copy of the GNU Lesser General Public
20   * License along with this software; if not, write to the Free
21   * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
22   * 02110-1301 USA, or see the FSF site: http://www.fsf.org.
23   */
24  package org.modeshape.graph.query.parse;
25  
26  import static org.modeshape.common.text.TokenStream.ANY_VALUE;
27  import java.util.ArrayList;
28  import java.util.Collection;
29  import java.util.Collections;
30  import java.util.List;
31  import java.util.concurrent.atomic.AtomicBoolean;
32  import org.modeshape.common.CommonI18n;
33  import org.modeshape.common.text.ParsingException;
34  import org.modeshape.common.text.Position;
35  import org.modeshape.common.text.TokenStream;
36  import org.modeshape.common.text.TokenStream.CharacterStream;
37  import org.modeshape.common.text.TokenStream.Tokenizer;
38  import org.modeshape.common.text.TokenStream.Tokens;
39  import org.modeshape.common.xml.XmlCharacters;
40  import org.modeshape.graph.GraphI18n;
41  import org.modeshape.graph.property.ValueFormatException;
42  import org.modeshape.graph.query.model.And;
43  import org.modeshape.graph.query.model.ArithmeticOperand;
44  import org.modeshape.graph.query.model.ArithmeticOperator;
45  import org.modeshape.graph.query.model.Between;
46  import org.modeshape.graph.query.model.BindVariableName;
47  import org.modeshape.graph.query.model.ChildNode;
48  import org.modeshape.graph.query.model.ChildNodeJoinCondition;
49  import org.modeshape.graph.query.model.Column;
50  import org.modeshape.graph.query.model.Comparison;
51  import org.modeshape.graph.query.model.Constraint;
52  import org.modeshape.graph.query.model.DescendantNode;
53  import org.modeshape.graph.query.model.DescendantNodeJoinCondition;
54  import org.modeshape.graph.query.model.DynamicOperand;
55  import org.modeshape.graph.query.model.EquiJoinCondition;
56  import org.modeshape.graph.query.model.FullTextSearch;
57  import org.modeshape.graph.query.model.FullTextSearchScore;
58  import org.modeshape.graph.query.model.Join;
59  import org.modeshape.graph.query.model.JoinCondition;
60  import org.modeshape.graph.query.model.JoinType;
61  import org.modeshape.graph.query.model.Length;
62  import org.modeshape.graph.query.model.Limit;
63  import org.modeshape.graph.query.model.Literal;
64  import org.modeshape.graph.query.model.LowerCase;
65  import org.modeshape.graph.query.model.NamedSelector;
66  import org.modeshape.graph.query.model.NodeDepth;
67  import org.modeshape.graph.query.model.NodeLocalName;
68  import org.modeshape.graph.query.model.NodeName;
69  import org.modeshape.graph.query.model.NodePath;
70  import org.modeshape.graph.query.model.Not;
71  import org.modeshape.graph.query.model.Operator;
72  import org.modeshape.graph.query.model.Or;
73  import org.modeshape.graph.query.model.Order;
74  import org.modeshape.graph.query.model.Ordering;
75  import org.modeshape.graph.query.model.PropertyExistence;
76  import org.modeshape.graph.query.model.PropertyValue;
77  import org.modeshape.graph.query.model.Query;
78  import org.modeshape.graph.query.model.QueryCommand;
79  import org.modeshape.graph.query.model.ReferenceValue;
80  import org.modeshape.graph.query.model.SameNode;
81  import org.modeshape.graph.query.model.SameNodeJoinCondition;
82  import org.modeshape.graph.query.model.Selector;
83  import org.modeshape.graph.query.model.SelectorName;
84  import org.modeshape.graph.query.model.SetCriteria;
85  import org.modeshape.graph.query.model.SetQuery;
86  import org.modeshape.graph.query.model.Source;
87  import org.modeshape.graph.query.model.StaticOperand;
88  import org.modeshape.graph.query.model.Subquery;
89  import org.modeshape.graph.query.model.TypeSystem;
90  import org.modeshape.graph.query.model.UpperCase;
91  import org.modeshape.graph.query.model.FullTextSearch.Term;
92  import org.modeshape.graph.query.model.SetQuery.Operation;
93  import org.modeshape.graph.query.model.TypeSystem.TypeFactory;
94  
95  /**
96   * A {@link QueryParser} implementation that parses a subset of SQL select and set queries.
97   * <p>
98   * This grammar is equivalent to the SQL grammar as defined by the JCR 2.0 specification, with some useful additions:
99   * <ul>
100  * <li>"<code>... (UNION|INTERSECT|EXCEPT) [ALL] ...</code>" to combine and merge results from multiple queries</li>
101  * <li>"<code>SELECT DISTINCT ...</code>" to remove duplicates</li>
102  * <li>"<code>LIMIT count [OFFSET number]</code>" clauses to control the number of results returned as well as the number of rows
103  * that should be skipped</li>
104  * <li>Support for additional join types, including "<code>FULL OUTER JOIN</code>" and "<code>CROSS JOIN</code>"</li>
105  * <li>Additional dynamic operands "<code>DEPTH([&lt;selectorName>])</code>" and "<code>PATH([&lt;selectorName>])</code>" that
106  * enables placing constraints on the node depth and path, respectively, and which can be used in a manner similar to "
107  * <code>NAME([&lt;selectorName>])</code>" and "<code>LOCALNAME([&lt;selectorName>])</code>. Note in each of these cases, the
108  * selector name is optional if there is only one selector in the query.</li>
109  * <li>Additional dynamic operand "<code>REFERENCE([&lt;selectorName>.]&lt;propertyName>])</code>" that
110  * enables placing constraints on one or all reference properties, and which can be used in a manner similar to "
111  * <code>PropertyValue([&lt;selectorName>.]&lt;propertyName>)</code>". Note in each of these cases, the
112  * selector name is optional if there is only one selector in the query, and that the property name can be excluded
113  * if the constraint should apply to all reference properties.</li>
114  * <li>Support for the IN clause and NOT IN clause to more easily supply a list of valid discrete static operands: "
115  * <code>&lt;dynamicOperand> [NOT] IN (&lt;staticOperand> {, &lt;staticOperand>})</code>"</li>
116  * <li>Support for the BETWEEN clause: "<code>&lt;dynamicOperand> [NOT] BETWEEN &lt;lowerBoundStaticOperand> [EXCLUSIVE] AND
117  * &lt;upperBoundStaticOperand> [EXCLUSIVE]</code>"</i>
118  * <li>Support for arithmetic operations ('+', '-', '*', '/') between dynamic operands used in <code>WHERE</code> criteria and <code>ORDER BY</code>
119  * clauses: "<code>WHERE &lt;dynamicOperand> + &lt;dynamicOperand> ...</code>" or "<code>ORDER BY (&lt;dynamicOperand> + &lt;dynamicOperand>) [ASC]</code>".
120  * Note that standard operator precedence is used, but grouping by (potentially nested) parentheses is also supported.</i>
121  * </ul>
122  * </p>
123  * <h3>SQL grammar</h3>
124  * <p>
125  * This section defines the complete grammar for the SQL dialect supported by this parser.
126  * </p>
127  * <h4>Queries</h4>
128  * 
129  * <pre>
130  * QueryCommand ::= Query | SetQuery
131  * 
132  * SetQuery ::= Query ('UNION'|'INTERSECT'|'EXCEPT') [ALL] Query
133  *                  { ('UNION'|'INTERSECT'|'EXCEPT') [ALL] Query }
134  * 
135  * Query ::= 'SELECT' ['DISINCT'] columns
136  *           'FROM' Source
137  *           ['WHERE' Constraint]
138  *           ['ORDER BY' orderings]
139  *           [Limit]
140  * </pre>
141  * 
142  * <h4>Sources</h4>
143  * 
144  * <pre>
145  * Source ::= Selector | Join
146  * 
147  * Selector ::= nodeTypeName ['AS' selectorName]
148  * 
149  * nodeTypeName ::= Name
150  * </pre>
151  * 
152  * <h4>Joins</h4>
153  * 
154  * <pre>
155  * Join ::= left [JoinType] 'JOIN' right 'ON' JoinCondition
156  *          // If JoinType is omitted INNER is assumed.
157  *          
158  * left ::= Source
159  * right ::= Source
160  * 
161  * JoinType ::= Inner | LeftOuter | RightOuter | FullOuter | Cross
162  * 
163  * Inner ::= 'INNER' ['JOIN']
164  * 
165  * LeftOuter ::= 'LEFT JOIN' | 'OUTER JOIN' | 'LEFT OUTER JOIN'
166  * 
167  * RightOuter ::= 'RIGHT OUTER' ['JOIN']
168  * 
169  * RightOuter ::= 'FULL OUTER' ['JOIN']
170  * 
171  * RightOuter ::= 'CROSS' ['JOIN']
172  * 
173  * JoinCondition ::= EquiJoinCondition | SameNodeJoinCondition | ChildNodeJoinCondition | DescendantNodeJoinCondition
174  * </pre>
175  * 
176  * <h5>Equi-join conditions</h5>
177  * 
178  * <pre>
179  * EquiJoinCondition ::= selector1Name'.'property1Name '=' selector2Name'.'property2Name
180  * 
181  * selector1Name ::= selectorName
182  * selector2Name ::= selectorName
183  * property1Name ::= propertyName
184  * property2Name ::= propertyName
185  * </pre>
186  * 
187  * <h5>Same-node join condition</h5>
188  * 
189  * <pre>
190  * SameNodeJoinCondition ::= 'ISSAMENODE(' selector1Name ',' selector2Name [',' selector2Path] ')'
191  * 
192  * selector2Path ::= Path
193  * </pre>
194  * 
195  * <h5>Child-node join condition</h5>
196  * 
197  * <pre>
198  * ChildNodeJoinCondition ::= 'ISCHILDNODE(' childSelectorName ',' parentSelectorName ')'
199  * 
200  * childSelectorName ::= selectorName
201  * parentSelectorName ::= selectorName
202  * </pre>
203  * 
204  * <h5>Descendant-node join condition</h5>
205  * 
206  * <pre>
207  * DescendantNodeJoinCondition ::= 'ISDESCENDANTNODE(' descendantSelectorName ',' ancestorSelectorName ')'
208  * descendantSelectorName ::= selectorName
209  * ancestorSelectorName ::= selectorName
210  * </pre>
211  * 
212  * <h4>Constraints</h4>
213  * 
214  * <pre>
215  * Constraint ::= ConstraintItem | '(' ConstraintItem ')'
216  * 
217  * ConstraintItem ::= And | Or | Not | Comparison | Between | PropertyExistence | SetConstraint | FullTextSearch | 
218  *                    SameNode | ChildNode | DescendantNode
219  * </pre>
220  * 
221  * <h5>And constraint</h5>
222  * 
223  * <pre>
224  * And ::= constraint1 'AND' constraint2
225  * 
226  * constraint1 ::= Constraint
227  * constraint2 ::= Constraint
228  * </pre>
229  * 
230  * <h5>Or constraint</h5>
231  * 
232  * <pre>
233  * Or ::= constraint1 'OR' constraint2
234  * </pre>
235  * 
236  * <h5>Not constraint</h5>
237  * 
238  * <pre>
239  * Not ::= 'NOT' Constraint
240  * </pre>
241  * 
242  * <h5>Comparison constraint</h5>
243  * 
244  * <pre>
245  * Comparison ::= DynamicOperand Operator StaticOperand
246  * 
247  * Operator ::= '=' | '!=' | '<' | '<=' | '>' | '>=' | 'LIKE'
248  * </pre>
249  * 
250  * <h5>Between constraint</h5>
251  * 
252  * <pre>
253  * Between ::= DynamicOperand ['NOT'] 'BETWEEN' lowerBound ['EXCLUSIVE'] 'AND' upperBound ['EXCLUSIVE']
254  * 
255  * lowerBound ::= StaticOperand
256  * upperBound ::= StaticOperand
257  * </pre>
258  * 
259  * <h5>Property existence constraint</h5>
260  * 
261  * <pre>
262  * PropertyExistence ::= selectorName'.'propertyName 'IS' ['NOT'] 'NULL' | 
263  *                       propertyName 'IS' ['NOT'] 'NULL' &#47;* If only one selector exists in this query *&#47;
264  * 
265  * </pre>
266  * 
267  * <h5>Set constraint</h5>
268  * 
269  * <pre>
270  * SetConstraint ::= selectorName'.'propertyName ['NOT'] 'IN' | 
271  *                       propertyName ['NOT'] 'IN' &#47;* If only one selector exists in this query *&#47;
272  *                       '(' firstStaticOperand {',' additionalStaticOperand } ')'
273  * firstStaticOperand ::= StaticOperand
274  * additionalStaticOperand ::= StaticOperand
275  * </pre>
276  * 
277  * <h5>Full-text search constraint</h5>
278  * 
279  * <pre>
280  * FullTextSearch ::= 'CONTAINS(' ([selectorName'.']propertyName | selectorName'.*') 
281  *                            ',' ''' fullTextSearchExpression''' ')'
282  *                    &#47;* If only one selector exists in this query, explicit specification of the selectorName
283  *                       preceding the propertyName is optional *&#47;
284  * fullTextSearchExpression ::= &#47;* a full-text search expression, see {@link FullTextSearchParser} *&#47;
285  * </pre>
286  * 
287  * <h5>Same-node constraint</h5>
288  * 
289  * <pre>
290  * SameNode ::= 'ISSAMENODE(' [selectorName ','] Path ')' 
291  *                    &#47;* If only one selector exists in this query, explicit specification of the selectorName
292  *                       preceding the propertyName is optional *&#47;
293  * </pre>
294  * 
295  * <h5>Child-node constraint</h5>
296  * 
297  * <pre>
298  * ChildNode ::= 'ISCHILDNODE(' [selectorName ','] Path ')' 
299  *                    &#47;* If only one selector exists in this query, explicit specification of the selectorName
300  *                       preceding the propertyName is optional *&#47;
301  * </pre>
302  * 
303  * <h5>Descendant-node constraint</h5>
304  * 
305  * <pre>
306  * DescendantNode ::= 'ISDESCENDANTNODE(' [selectorName ','] Path ')' 
307  *                    /* If only one selector exists in this query, explicit specification of the selectorName
308  *                       preceding the propertyName is optional *&#47;
309  * </pre>
310  * 
311  * <h5>Paths and names</h5>
312  * 
313  * <pre>
314  * 
315  * Name ::= '[' quotedName ']' | '[' simpleName ']' | simpleName
316  * 
317  * quotedName ::= /* A JCR Name (see the JCR specification) *&#47;
318  * simpleName ::= /* A JCR Name that contains only SQL-legal characters (namely letters, digits, and underscore) *&#47;
319  *
320  * Path ::= '[' quotedPath ']' | '[' simplePath ']' | simplePath
321  *
322  * quotedPath ::= /* A JCR Path that contains non-SQL-legal characters *&#47;
323  * simplePath ::= /* A JCR Path (rather Name) that contains only SQL-legal characters (namely letters, digits, and underscore) *&#47;
324  * </pre>
325  * 
326  * <h4>Static operands</h4>
327  * 
328  * <pre>
329  * StaticOperand ::= Literal | BindVariableValue
330  * </pre>
331  * 
332  * <h5>Literal</h5>
333  * 
334  * <pre>
335  * Literal ::= CastLiteral | UncastLiteral
336  * 
337  * CastLiteral ::= 'CAST(' UncastLiteral ' AS ' PropertyType ')'
338  * 
339  * PropertyType ::= 'STRING' | 'BINARY' | 'DATE' | 'LONG' | 'DOUBLE' | 'DECIMAL' | 'BOOLEAN' | 'NAME' | 'PATH' | 
340  *                  'REFERENCE' | 'WEAKREFERENCE' | 'URI'
341  *                  
342  * UncastLiteral ::= UnquotedLiteral | ''' UnquotedLiteral ''' | '"' UnquotedLiteral '"'
343  * 
344  * UnquotedLiteral ::= /* String form of a JCR Value, as defined in the JCR specification *&#47;
345  * </pre>
346  * 
347  * <h5>Bind variables</h5>
348  * 
349  * <pre>
350  * BindVariableValue ::= '$'bindVariableName
351  * 
352  * bindVariableName ::= /* A string that conforms to the JCR Name syntax, though the prefix does not need to be
353  *                         a registered namespace prefix. *&#47;
354  * </pre>
355  * 
356  * <h4>Dynamic operands</h4>
357  * 
358  * <pre>
359  * DynamicOperand ::= PropertyValue | ReferenceValue | Length | NodeName | NodeLocalName | NodePath | NodeDepth | 
360  *                    FullTextSearchScore | LowerCase | UpperCase | Arithmetic |
361  *                    '(' DynamicOperand ')'
362  * </pre>
363  * <h5>Property value</h5>
364  * <pre>
365  * PropertyValue ::= [selectorName'.'] propertyName
366  *                    /* If only one selector exists in this query, explicit specification of the selectorName
367  *                       preceding the propertyName is optional *&#47;
368  * </pre>
369  * <h5>Reference value</h5>
370  * <pre>
371  * ReferenceValue ::= 'REFERENCE(' selectorName '.' propertyName ')' |
372  *                    'REFERENCE(' selectorName ')' |
373  *                    'REFERENCE()' |
374  *                    /* If only one selector exists in this query, explicit specification of the selectorName
375  *                       preceding the propertyName is optional. Also, the property name may be excluded 
376  *                       if the constraint should apply to any reference property. *&#47;
377  * </pre>
378  * <h5>Property length</h5>
379  * <pre>
380  * Length ::= 'LENGTH(' PropertyValue ')'
381  * </pre>
382  * <h5>Node name</h5>
383  * <pre>
384  * NodeName ::= 'NAME(' [selectorName] ')'
385  *                    /* If only one selector exists in this query, explicit specification of the selectorName
386  *                       is optional *&#47;
387  * </pre>
388  * <h5>Node local name</h5>
389  * <pre>
390  * NodeLocalName ::= 'LOCALNAME(' [selectorName] ')'
391  *                    /* If only one selector exists in this query, explicit specification of the selectorName
392  *                       is optional *&#47;
393  * </pre>
394  * <h5>Node path</h5>
395  * <pre>
396  * NodePath ::= 'PATH(' [selectorName] ')'
397  *                    /* If only one selector exists in this query, explicit specification of the selectorName
398  *                       is optional *&#47;
399  * </pre>
400  * <h5>Node depth</h5>
401  * <pre>
402  * NodeDepth ::= 'DEPTH(' [selectorName] ')'
403  *                    /* If only one selector exists in this query, explicit specification of the selectorName
404  *                       is optional *&#47;
405  * </pre>
406  * <h5>Full-text search score</h5>
407  * <pre>
408  * FullTextSearchScore ::= 'SCORE(' [selectorName] ')'
409  *                    /* If only one selector exists in this query, explicit specification of the selectorName
410  *                       is optional *&#47;
411  * </pre>
412  * <h5>Lowercase</h5>
413  * <pre>
414  * LowerCase ::= 'LOWER(' DynamicOperand ')'
415  * </pre>
416  * <h5>Uppercase</h5>
417  * <pre>
418  * UpperCase ::= 'UPPER(' DynamicOperand ')'
419  * </pre>
420  * <h5>Arithmetic</h5>
421  * <pre>
422  * Arithmetic ::= DynamicOperand ('+'|'-'|'*'|'/') DynamicOperand
423  * </pre>
424  * 
425  * <h4>Ordering</h4>
426  * 
427  * <pre>
428  * orderings ::= Ordering {',' Ordering}
429  * 
430  * Ordering ::= DynamicOperand [Order]
431  * 
432  * Order ::= 'ASC' | 'DESC'
433  * </pre>
434  * 
435  * <h4>Columns</h4>
436  * 
437  * <pre>
438  * columns ::= (Column ',' {Column}) | '*'
439  * 
440  * Column ::= ([selectorName'.']propertyName ['AS' columnName]) | (selectorName'.*')
441  *                    /* If only one selector exists in this query, explicit specification of the selectorName
442  *                       preceding the propertyName is optional *&#47;
443  * selectorName ::= Name
444  * propertyName ::= Name
445  * columnName ::= Name
446  * </pre>
447  * 
448  * <h4>Limit</h4>
449  * 
450  * <pre>
451  * Limit ::= 'LIMIT' count [ 'OFFSET' offset ]
452  * count ::= /* Positive integer value *&#47;
453  * offset ::= /* Non-negative integer value *&#47;
454  * </pre>
455  */
456 public class SqlQueryParser implements QueryParser {
457 
458     public static final String LANGUAGE = "SQL";
459 
460     /**
461      * {@inheritDoc}
462      * 
463      * @see org.modeshape.graph.query.parse.QueryParser#getLanguage()
464      */
465     public String getLanguage() {
466         return LANGUAGE;
467     }
468 
469     /**
470      * {@inheritDoc}
471      * 
472      * @see java.lang.Object#toString()
473      */
474     @Override
475     public String toString() {
476         return getLanguage();
477     }
478 
479     /**
480      * {@inheritDoc}
481      * 
482      * @see java.lang.Object#equals(java.lang.Object)
483      */
484     @Override
485     public boolean equals( Object obj ) {
486         if (obj == this) return true;
487         if (obj instanceof QueryParser) {
488             QueryParser that = (QueryParser)obj;
489             return this.getLanguage().equals(that.getLanguage());
490         }
491         return false;
492     }
493 
494     /**
495      * {@inheritDoc}
496      * 
497      * @see org.modeshape.graph.query.parse.QueryParser#parseQuery(String, TypeSystem)
498      */
499     public QueryCommand parseQuery( String query,
500                                     TypeSystem typeSystem ) {
501         Tokenizer tokenizer = new SqlTokenizer(false);
502         TokenStream tokens = new TokenStream(query, tokenizer, false);
503         tokens.start();
504         return parseQueryCommand(tokens, typeSystem);
505     }
506 
507     protected QueryCommand parseQueryCommand( TokenStream tokens,
508                                               TypeSystem typeSystem ) {
509         QueryCommand command = null;
510         if (tokens.matches("SELECT")) {
511             command = parseQuery(tokens, typeSystem);
512             while (tokens.hasNext()) {
513                 if (tokens.matchesAnyOf("UNION", "INTERSECT", "EXCEPT")) {
514                     command = parseSetQuery(tokens, command, typeSystem);
515                 } else if (tokens.matches(')')) {
516                     // There's more in this token stream, but we'll stop reading ...
517                     break;
518                 } else {
519                     Position pos = tokens.previousPosition();
520                     String msg = GraphI18n.unexpectedToken.text(tokens.consume(), pos.getLine(), pos.getColumn());
521                     throw new ParsingException(pos, msg);
522                 }
523             }
524         } else {
525             // We expected SELECT ...
526             Position pos = tokens.nextPosition();
527             String msg = GraphI18n.unexpectedToken.text(tokens.consume(), pos.getLine(), pos.getColumn());
528             throw new ParsingException(pos, msg);
529         }
530         return command;
531     }
532 
533     protected Query parseQuery( TokenStream tokens,
534                                 TypeSystem typeSystem ) {
535         AtomicBoolean isDistinct = new AtomicBoolean(false);
536         List<ColumnExpression> columnExpressions = parseSelect(tokens, isDistinct, typeSystem);
537         Source source = parseFrom(tokens, typeSystem);
538         Constraint constraint = parseWhere(tokens, typeSystem, source);
539         // Parse the order by and limit (can be in any order) ...
540         List<? extends Ordering> orderings = parseOrderBy(tokens, typeSystem, source);
541         Limit limit = parseLimit(tokens);
542         if (orderings == null) parseOrderBy(tokens, typeSystem, source);
543 
544         // Convert the column expressions to columns ...
545         List<Column> columns = new ArrayList<Column>(columnExpressions.size());
546         for (ColumnExpression expression : columnExpressions) {
547             SelectorName selectorName = expression.getSelectorName();
548             String propertyName = expression.getPropertyName();
549             if (selectorName == null) {
550                 if (source instanceof Selector) {
551                     selectorName = ((Selector)source).aliasOrName();
552                 } else {
553                     Position pos = expression.getPosition();
554                     String msg = GraphI18n.mustBeScopedAtLineAndColumn.text(expression, pos.getLine(), pos.getColumn());
555                     throw new ParsingException(pos, msg);
556                 }
557             }
558             columns.add(column(selectorName, propertyName, expression.getColumnName()));
559         }
560         // Now create the query ...
561         return query(source, constraint, orderings, columns, limit, isDistinct.get());
562     }
563 
564     protected SetQuery parseSetQuery( TokenStream tokens,
565                                       QueryCommand leftHandSide,
566                                       TypeSystem typeSystem ) {
567         Operation operation = null;
568         if (tokens.canConsume("UNION")) {
569             operation = Operation.UNION;
570         } else if (tokens.canConsume("INTERSECT")) {
571             operation = Operation.INTERSECT;
572         } else {
573             tokens.consume("EXCEPT");
574             operation = Operation.EXCEPT;
575         }
576         boolean all = tokens.canConsume("ALL");
577         // Parse the next select
578         QueryCommand rightQuery = parseQuery(tokens, typeSystem);
579         return setQuery(leftHandSide, operation, rightQuery, all);
580     }
581 
582     protected List<ColumnExpression> parseSelect( TokenStream tokens,
583                                                   AtomicBoolean isDistinct,
584                                                   TypeSystem typeSystem ) {
585         tokens.consume("SELECT");
586         if (tokens.canConsume("DISTINCT")) isDistinct.set(true);
587         if (tokens.canConsume('*')) {
588             return Collections.emptyList();
589         }
590         List<ColumnExpression> columns = new ArrayList<ColumnExpression>();
591         do {
592             Position position = tokens.nextPosition();
593             String propertyName = parseName(tokens, typeSystem);
594             SelectorName selectorName = null;
595             if (tokens.canConsume('.')) {
596                 // We actually read the selector name, so now read the property name ...
597                 selectorName = new SelectorName(propertyName);
598                 propertyName = parseName(tokens, typeSystem);
599             }
600             String alias = propertyName;
601             if (tokens.canConsume("AS")) alias = parseName(tokens, typeSystem);
602             columns.add(new ColumnExpression(selectorName, propertyName, alias, position));
603         } while (tokens.canConsume(','));
604         return columns;
605     }
606 
607     protected Source parseFrom( TokenStream tokens,
608                                 TypeSystem typeSystem ) {
609         Source source = null;
610         tokens.consume("FROM");
611         source = parseNamedSelector(tokens, typeSystem);
612         while (tokens.hasNext()) {
613             JoinType joinType = null;
614             if (tokens.canConsume("JOIN") || tokens.canConsume("INNER", "JOIN")) {
615                 joinType = JoinType.INNER;
616             } else if (tokens.canConsume("OUTER", "JOIN") || tokens.canConsume("LEFT", "JOIN")
617                        || tokens.canConsume("LEFT", "OUTER", "JOIN")) {
618                 joinType = JoinType.LEFT_OUTER;
619             } else if (tokens.canConsume("RIGHT", "OUTER", "JOIN") || tokens.canConsume("RIGHT", "OUTER")) {
620                 joinType = JoinType.RIGHT_OUTER;
621             } else if (tokens.canConsume("FULL", "OUTER", "JOIN") || tokens.canConsume("FULL", "OUTER")) {
622                 joinType = JoinType.FULL_OUTER;
623             } else if (tokens.canConsume("CROSS", "JOIN") || tokens.canConsume("CROSS")) {
624                 joinType = JoinType.CROSS;
625             }
626             if (joinType == null) break;
627             // Read the name of the selector on the right side of the join ...
628             NamedSelector right = parseNamedSelector(tokens, typeSystem);
629             // Read the join condition ...
630             JoinCondition joinCondition = parseJoinCondition(tokens, typeSystem);
631             // Create the join ...
632             source = join(source, joinType, right, joinCondition);
633         }
634         return source;
635     }
636 
637     protected JoinCondition parseJoinCondition( TokenStream tokens,
638                                                 TypeSystem typeSystem ) {
639         tokens.consume("ON");
640         if (tokens.canConsume("ISSAMENODE", "(")) {
641             SelectorName selector1Name = parseSelectorName(tokens, typeSystem);
642             tokens.consume(',');
643             SelectorName selector2Name = parseSelectorName(tokens, typeSystem);
644             if (tokens.canConsume('.')) {
645                 String path = parsePath(tokens, typeSystem);
646                 tokens.consume(')');
647                 return sameNodeJoinCondition(selector1Name, selector2Name, path);
648             }
649             tokens.consume(')');
650             return sameNodeJoinCondition(selector1Name, selector2Name);
651         }
652         if (tokens.canConsume("ISCHILDNODE", "(")) {
653             SelectorName child = parseSelectorName(tokens, typeSystem);
654             tokens.consume(',');
655             SelectorName parent = parseSelectorName(tokens, typeSystem);
656             tokens.consume(')');
657             return childNodeJoinCondition(parent, child);
658         }
659         if (tokens.canConsume("ISDESCENDANTNODE", "(")) {
660             SelectorName descendant = parseSelectorName(tokens, typeSystem);
661             tokens.consume(',');
662             SelectorName ancestor = parseSelectorName(tokens, typeSystem);
663             tokens.consume(')');
664             return descendantNodeJoinCondition(ancestor, descendant);
665         }
666         SelectorName selector1 = parseSelectorName(tokens, typeSystem);
667         tokens.consume('.');
668         String property1 = parseName(tokens, typeSystem);
669         tokens.consume('=');
670         SelectorName selector2 = parseSelectorName(tokens, typeSystem);
671         tokens.consume('.');
672         String property2 = parseName(tokens, typeSystem);
673         return equiJoinCondition(selector1, property1, selector2, property2);
674     }
675 
676     protected Constraint parseWhere( TokenStream tokens,
677                                      TypeSystem typeSystem,
678                                      Source source ) {
679         if (tokens.canConsume("WHERE")) {
680             return parseConstraint(tokens, typeSystem, source);
681         }
682         return null;
683     }
684 
685     protected Constraint parseConstraint( TokenStream tokens,
686                                           TypeSystem typeSystem,
687                                           Source source ) {
688         Constraint constraint = null;
689         Position pos = tokens.nextPosition();
690         if (tokens.canConsume("(")) {
691             constraint = parseConstraint(tokens, typeSystem, source);
692             tokens.consume(")");
693         } else if (tokens.canConsume("NOT")) {
694             tokens.canConsume('(');
695             constraint = not(parseConstraint(tokens, typeSystem, source));
696             tokens.canConsume(')');
697         } else if (tokens.canConsume("CONTAINS", "(")) {
698             // Either 'selectorName.propertyName', or 'selectorName.*' or 'propertyName' ...
699             String first = tokens.consume();
700             SelectorName selectorName = null;
701             String propertyName = null;
702             if (tokens.canConsume(".", "*")) {
703                 selectorName = new SelectorName(removeBracketsAndQuotes(first));
704             } else if (tokens.canConsume('.')) {
705                 selectorName = new SelectorName(removeBracketsAndQuotes(first));
706                 propertyName = parseName(tokens, typeSystem);
707             } else {
708                 if (!(source instanceof Selector)) {
709                     String msg = GraphI18n.functionIsAmbiguous.text("CONTAINS()", pos.getLine(), pos.getColumn());
710                     throw new ParsingException(pos, msg);
711                 }
712                 selectorName = ((Selector)source).name();
713                 propertyName = removeBracketsAndQuotes(first);
714             }
715             tokens.consume(',');
716 
717             // Followed by the full text search expression ...
718             String expression = removeBracketsAndQuotes(tokens.consume(), false); // don't remove nested quotes
719             Term term = parseFullTextSearchExpression(expression, tokens.previousPosition());
720             tokens.consume(")");
721             constraint = fullTextSearch(selectorName, propertyName, expression, term);
722         } else if (tokens.canConsume("ISSAMENODE", "(")) {
723             SelectorName selectorName = null;
724             if (tokens.matches(ANY_VALUE, ")")) {
725                 if (!(source instanceof Selector)) {
726                     String msg = GraphI18n.functionIsAmbiguous.text("ISSAMENODE()", pos.getLine(), pos.getColumn());
727                     throw new ParsingException(pos, msg);
728                 }
729                 selectorName = ((Selector)source).name();
730             } else {
731                 selectorName = parseSelectorName(tokens, typeSystem);
732                 tokens.consume(',');
733             }
734             String path = parsePath(tokens, typeSystem);
735             tokens.consume(')');
736             constraint = sameNode(selectorName, path);
737         } else if (tokens.canConsume("ISCHILDNODE", "(")) {
738             SelectorName selectorName = null;
739             if (tokens.matches(ANY_VALUE, ")")) {
740                 if (!(source instanceof Selector)) {
741                     String msg = GraphI18n.functionIsAmbiguous.text("ISCHILDNODE()", pos.getLine(), pos.getColumn());
742                     throw new ParsingException(pos, msg);
743                 }
744                 selectorName = ((Selector)source).name();
745             } else {
746                 selectorName = parseSelectorName(tokens, typeSystem);
747                 tokens.consume(',');
748             }
749             String path = parsePath(tokens, typeSystem);
750             tokens.consume(')');
751             constraint = childNode(selectorName, path);
752         } else if (tokens.canConsume("ISDESCENDANTNODE", "(")) {
753             SelectorName selectorName = null;
754             if (tokens.matches(ANY_VALUE, ")")) {
755                 if (!(source instanceof Selector)) {
756                     String msg = GraphI18n.functionIsAmbiguous.text("ISDESCENDANTNODE()", pos.getLine(), pos.getColumn());
757                     throw new ParsingException(pos, msg);
758                 }
759                 selectorName = ((Selector)source).name();
760             } else {
761                 selectorName = parseSelectorName(tokens, typeSystem);
762                 tokens.consume(',');
763             }
764             String path = parsePath(tokens, typeSystem);
765             tokens.consume(')');
766             constraint = descendantNode(selectorName, path);
767         } else {
768             // First try a property existance ...
769             Position pos2 = tokens.nextPosition();
770             constraint = parsePropertyExistance(tokens, typeSystem, source);
771             if (constraint == null) {
772                 // Try to parse as a dynamic operand ...
773                 DynamicOperand left = parseDynamicOperand(tokens, typeSystem, source);
774                 if (left != null) {
775                     if (tokens.matches('(') && left instanceof PropertyValue) {
776                         // This was probably a bad function that we parsed as the start of a dynamic operation ...
777                         String name = ((PropertyValue)left).propertyName(); // this may be the function name
778                         String msg = GraphI18n.expectingConstraintCondition.text(name, pos2.getLine(), pos2.getColumn());
779                         throw new ParsingException(pos, msg);
780                     }
781                     if (tokens.matches("IN", "(") || tokens.matches("NOT", "IN", "(")) {
782                         boolean not = tokens.canConsume("NOT");
783                         Collection<StaticOperand> staticOperands = parseInClause(tokens, typeSystem);
784                         constraint = setCriteria(left, staticOperands);
785                         if (not) constraint = not(constraint);
786                     } else if (tokens.matches("BETWEEN") || tokens.matches("NOT", "BETWEEN")) {
787                         boolean not = tokens.canConsume("NOT");
788                         tokens.consume("BETWEEN");
789                         StaticOperand lowerBound = parseStaticOperand(tokens, typeSystem);
790                         boolean lowerInclusive = !tokens.canConsume("EXCLUSIVE");
791                         tokens.consume("AND");
792                         StaticOperand upperBound = parseStaticOperand(tokens, typeSystem);
793                         boolean upperInclusive = !tokens.canConsume("EXCLUSIVE");
794                         constraint = between(left, lowerBound, upperBound, lowerInclusive, upperInclusive);
795                         if (not) constraint = not(constraint);
796                     } else {
797                         Operator operator = parseComparisonOperator(tokens);
798                         StaticOperand right = parseStaticOperand(tokens, typeSystem);
799                         constraint = comparison(left, operator, right);
800                     }
801                 }
802                 // else continue ...
803             }
804         }
805         if (constraint == null) {
806             String msg = GraphI18n.expectingConstraintCondition.text(tokens.consume(), pos.getLine(), pos.getColumn());
807             throw new ParsingException(pos, msg);
808         }
809         // AND has higher precedence than OR, so we need to evaluate it first ...
810         while (tokens.canConsume("AND")) {
811             Constraint rhs = parseConstraint(tokens, typeSystem, source);
812             if (rhs != null) constraint = and(constraint, rhs);
813         }
814         while (tokens.canConsume("OR")) {
815             Constraint rhs = parseConstraint(tokens, typeSystem, source);
816             if (rhs != null) constraint = or(constraint, rhs);
817         }
818         return constraint;
819     }
820 
821     protected List<StaticOperand> parseInClause( TokenStream tokens,
822                                                  TypeSystem typeSystem ) {
823         List<StaticOperand> result = new ArrayList<StaticOperand>();
824         tokens.consume("IN");
825         tokens.consume("(");
826         if (!tokens.canConsume(")")) {
827             // Not empty, so read the static operands ...
828             do {
829                 result.add(parseStaticOperand(tokens, typeSystem));
830             } while (tokens.canConsume(','));
831             tokens.consume(")");
832         }
833         return result;
834     }
835 
836     protected Term parseFullTextSearchExpression( String expression,
837                                                   Position startOfExpression ) {
838         try {
839             return new FullTextSearchParser().parse(expression);
840         } catch (ParsingException e) {
841             // Convert the position in the exception into a position in the query.
842             Position queryPos = startOfExpression.add(e.getPosition());
843             throw new ParsingException(queryPos, e.getMessage());
844         }
845     }
846 
847     protected Operator parseComparisonOperator( TokenStream tokens ) {
848         if (tokens.canConsume("=")) return Operator.EQUAL_TO;
849         if (tokens.canConsume("LIKE")) return Operator.LIKE;
850         if (tokens.canConsume("!", "=")) return Operator.NOT_EQUAL_TO;
851         if (tokens.canConsume("<", ">")) return Operator.NOT_EQUAL_TO;
852         if (tokens.canConsume("<", "=")) return Operator.LESS_THAN_OR_EQUAL_TO;
853         if (tokens.canConsume(">", "=")) return Operator.GREATER_THAN_OR_EQUAL_TO;
854         if (tokens.canConsume("<")) return Operator.LESS_THAN;
855         if (tokens.canConsume(">")) return Operator.GREATER_THAN;
856         Position pos = tokens.nextPosition();
857         String msg = GraphI18n.expectingComparisonOperator.text(tokens.consume(), pos.getLine(), pos.getColumn());
858         throw new ParsingException(pos, msg);
859     }
860 
861     protected List<Ordering> parseOrderBy( TokenStream tokens,
862                                            TypeSystem typeSystem,
863                                            Source source ) {
864         if (tokens.canConsume("ORDER", "BY")) {
865             List<Ordering> orderings = new ArrayList<Ordering>();
866             do {
867                 orderings.add(parseOrdering(tokens, typeSystem, source));
868             } while (tokens.canConsume(','));
869             return orderings;
870         }
871         return null;
872     }
873 
874     protected Ordering parseOrdering( TokenStream tokens,
875                                       TypeSystem typeSystem,
876                                       Source source ) {
877         DynamicOperand operand = parseDynamicOperand(tokens, typeSystem, source);
878         Order order = Order.ASCENDING;
879         if (tokens.canConsume("DESC")) order = Order.DESCENDING;
880         if (tokens.canConsume("ASC")) order = Order.ASCENDING;
881         return ordering(operand, order);
882     }
883 
884     protected Constraint parsePropertyExistance( TokenStream tokens,
885                                                  TypeSystem typeSystem,
886                                                  Source source ) {
887         if (tokens.matches(ANY_VALUE, ".", ANY_VALUE, "IS", "NOT", "NULL")
888             || tokens.matches(ANY_VALUE, ".", ANY_VALUE, "IS", "NULL") || tokens.matches(ANY_VALUE, "IS", "NOT", "NULL")
889             || tokens.matches(ANY_VALUE, "IS", "NULL")) {
890             Position pos = tokens.nextPosition();
891             String firstWord = tokens.consume();
892             SelectorName selectorName = null;
893             String propertyName = null;
894             if (tokens.canConsume('.')) {
895                 // We actually read the selector name, so now read the property name ...
896                 selectorName = new SelectorName(firstWord);
897                 propertyName = parseName(tokens, typeSystem);
898             } else {
899                 // Otherwise the source should be a single named selector
900                 if (!(source instanceof Selector)) {
901                     String msg = GraphI18n.mustBeScopedAtLineAndColumn.text(firstWord, pos.getLine(), pos.getColumn());
902                     throw new ParsingException(pos, msg);
903                 }
904                 selectorName = ((Selector)source).name();
905                 propertyName = parseName(firstWord, typeSystem);
906             }
907             if (tokens.canConsume("IS", "NOT", "NULL")) {
908                 return propertyExistence(selectorName, propertyName);
909             }
910             tokens.consume("IS", "NULL");
911             return not(propertyExistence(selectorName, propertyName));
912         }
913         return null;
914     }
915 
916     protected StaticOperand parseStaticOperand( TokenStream tokens,
917                                                 TypeSystem typeSystem ) {
918         if (tokens.canConsume('$')) {
919             // The variable name must conform to a valid prefix, which is defined as a valid NCName ...
920             String value = tokens.consume();
921             if (!XmlCharacters.isValidNcName(value)) {
922                 Position pos = tokens.previousPosition();
923                 String msg = GraphI18n.bindVariableMustConformToNcName.text(value, pos.getLine(), pos.getColumn());
924                 throw new ParsingException(pos, msg);
925             }
926             return bindVariableName(value);
927         }
928         if (tokens.canConsume('(')) {
929             // Sometimes the subqueries are wrapped with parentheses ...
930             StaticOperand result = parseStaticOperand(tokens, typeSystem);
931             tokens.consume(')');
932             return result;
933         }
934         if (tokens.matches("SELECT")) {
935             // This is a subquery. This object is stateless, so we can reuse this object ...
936             QueryCommand subqueryExpression = parseQueryCommand(tokens, typeSystem);
937             return subquery(subqueryExpression);
938         }
939         return parseLiteral(tokens, typeSystem);
940     }
941 
942     protected Subquery subquery( QueryCommand queryCommand ) {
943         return new Subquery(queryCommand);
944     }
945 
946     protected Literal parseLiteral( TokenStream tokens,
947                                     TypeSystem typeSystem ) {
948         if (tokens.canConsume("CAST", "(")) {
949             // Get the value that is to be cast ...
950             Position pos = tokens.nextPosition();
951             Object value = parseLiteralValue(tokens, typeSystem);
952             // Figure out the type we're supposed to cast to ...
953             tokens.consume("AS");
954             String typeName = tokens.consume();
955             TypeFactory<?> typeFactory = typeSystem.getTypeFactory(typeName);
956             if (typeFactory == null) {
957                 Position typePos = tokens.previousPosition();
958                 String msg = GraphI18n.invalidPropertyType.text(tokens.consume(), typePos.getLine(), typePos.getColumn());
959                 throw new ParsingException(typePos, msg);
960             }
961             // Convert the supplied value to the desired value ...
962             tokens.consume(')');
963             try {
964                 Object literal = typeFactory.create(value);
965                 return literal(typeSystem, literal);
966             } catch (ValueFormatException e) {
967                 String msg = GraphI18n.valueCannotBeCastToSpecifiedType.text(value,
968                                                                              pos.getLine(),
969                                                                              pos.getColumn(),
970                                                                              typeFactory.getTypeName(),
971                                                                              e.getMessage());
972                 throw new ParsingException(pos, msg);
973             }
974         }
975         // Just create a literal out of the supplied value ...
976         return literal(typeSystem, parseLiteralValue(tokens, typeSystem));
977     }
978 
979     protected Object parseLiteralValue( TokenStream tokens,
980                                         TypeSystem typeSystem ) {
981         if (tokens.matches(SqlTokenizer.QUOTED_STRING)) {
982             return removeBracketsAndQuotes(tokens.consume());
983         }
984         TypeFactory<Boolean> booleanFactory = typeSystem.getBooleanFactory();
985         if (booleanFactory != null) {
986             if (tokens.canConsume("TRUE")) return booleanFactory.asString(Boolean.TRUE);
987             if (tokens.canConsume("FALSE")) return booleanFactory.asString(Boolean.FALSE);
988         }
989 
990         // Otherwise it is an unquoted literal value ...
991         Position pos = tokens.nextPosition();
992         String sign = "";
993         if (tokens.canConsume('-')) sign = "-";
994         else if (tokens.canConsume('+')) sign = "";
995 
996         // Try to parse this value as a number ...
997         String integral = tokens.consume();
998         TypeFactory<Double> doubleFactory = typeSystem.getDoubleFactory();
999         if (doubleFactory != null) {
1000             String decimal = null;
1001             if (tokens.canConsume('.')) {
1002                 decimal = tokens.consume();
1003                 String value = sign + integral + "." + decimal;
1004                 if (decimal.endsWith("e") && (tokens.matches('+') || tokens.matches('-'))) {
1005                     // There's more to the number ...
1006                     value = value + tokens.consume() + tokens.consume(); // +/-EXP
1007                 }
1008                 try {
1009                     // Convert to a double and then back to a string to get canonical form ...
1010                     return doubleFactory.asString(doubleFactory.create(value));
1011                 } catch (ValueFormatException e) {
1012                     String msg = GraphI18n.expectingLiteralAndUnableToParseAsDouble.text(value, pos.getLine(), pos.getColumn());
1013                     throw new ParsingException(pos, msg);
1014                 }
1015             }
1016         }
1017         TypeFactory<?> dateTimeFactory = typeSystem.getDateTimeFactory();
1018         if (dateTimeFactory != null) {
1019             if (tokens.canConsume('-')) {
1020                 // Looks like a date (see Section 3.6.4.3 of the JCR 2.0 specification) ...
1021                 // sYYYY-MM-DDThh:mm:ss.sssTZD
1022                 String year = integral;
1023                 String month = tokens.consume();
1024                 tokens.consume('-');
1025                 String dateAndHour = tokens.consume();
1026                 tokens.consume(':');
1027                 String minutes = tokens.consume();
1028                 tokens.consume(':');
1029                 String seconds = tokens.consume();
1030                 tokens.consume('.');
1031                 String subSeconds = tokens.consume(); // should contain 'T' separator and possibly the TZ name and (if no +/-)
1032                 // hours
1033                 String tzSign = "+";
1034                 String tzHours = "00";
1035                 String tzMinutes = "00";
1036                 String tzDelim = ":";
1037                 if (tokens.canConsume('+')) {
1038                     // the fractionalSeconds did NOT contain the tzHours ...
1039                     tzHours = tokens.consume();
1040                     if (tokens.canConsume(':')) tzMinutes = tokens.consume();
1041                 } else if (tokens.canConsume('-')) {
1042                     // the fractionalSeconds did NOT contain the tzHours ...
1043                     tzSign = "-";
1044                     tzHours = tokens.consume();
1045                     if (tokens.canConsume(':')) tzMinutes = tokens.consume();
1046                 } else if (tokens.canConsume(':')) {
1047                     // fractionalSeconds DID contain the TZ hours (without + or -)
1048                     tzHours = tzSign = "";
1049                     if (tokens.canConsume(':')) tzMinutes = tokens.consume();
1050                 } else if (subSeconds.endsWith("Z")) {
1051                     tzSign = tzMinutes = tzDelim = tzHours = "";
1052                 } else if (subSeconds.endsWith("UTC")) {
1053                     subSeconds = subSeconds.length() > 3 ? subSeconds.substring(0, subSeconds.length() - 3) : subSeconds;
1054                 }
1055                 String value = sign + year + "-" + month + "-" + dateAndHour + ":" + minutes + ":" + seconds + "." + subSeconds
1056                                + tzSign + tzHours + tzDelim + tzMinutes;
1057                 try {
1058                     // Convert to a date and then back to a string to get canonical form ...
1059                     Object dateTime = dateTimeFactory.create(value);
1060                     return dateTimeFactory.asString(dateTime);
1061                 } catch (ValueFormatException e) {
1062                     String msg = GraphI18n.expectingLiteralAndUnableToParseAsDate.text(value, pos.getLine(), pos.getColumn());
1063                     throw new ParsingException(pos, msg);
1064                 }
1065             }
1066         }
1067         TypeFactory<Long> longFactory = typeSystem.getLongFactory();
1068         // try to parse an a long ...
1069         String value = sign + integral;
1070         try {
1071             // Convert to a long and then back to a string to get canonical form ...
1072             return longFactory.asString(longFactory.create(value));
1073         } catch (ValueFormatException e) {
1074             String msg = GraphI18n.expectingLiteralAndUnableToParseAsLong.text(value, pos.getLine(), pos.getColumn());
1075             throw new ParsingException(pos, msg);
1076         }
1077     }
1078 
1079     protected DynamicOperand parseDynamicOperand( TokenStream tokens,
1080                                                   TypeSystem typeSystem,
1081                                                   Source source ) {
1082         DynamicOperand result = null;
1083         Position pos = tokens.nextPosition();
1084         if (tokens.canConsume('(')) {
1085             result = parseDynamicOperand(tokens, typeSystem, source);
1086             tokens.consume(")");
1087         } else if (tokens.canConsume("LENGTH", "(")) {
1088             result = length(parsePropertyValue(tokens, typeSystem, source));
1089             tokens.consume(")");
1090         } else if (tokens.canConsume("LOWER", "(")) {
1091             result = lowerCase(parseDynamicOperand(tokens, typeSystem, source));
1092             tokens.consume(")");
1093         } else if (tokens.canConsume("UPPER", "(")) {
1094             result = upperCase(parseDynamicOperand(tokens, typeSystem, source));
1095             tokens.consume(")");
1096         } else if (tokens.canConsume("NAME", "(")) {
1097             if (tokens.canConsume(")")) {
1098                 if (source instanceof Selector) {
1099                     return nodeName(((Selector)source).name());
1100                 }
1101                 String msg = GraphI18n.functionIsAmbiguous.text("NAME()", pos.getLine(), pos.getColumn());
1102                 throw new ParsingException(pos, msg);
1103             }
1104             result = nodeName(parseSelectorName(tokens, typeSystem));
1105             tokens.consume(")");
1106         } else if (tokens.canConsume("LOCALNAME", "(")) {
1107             if (tokens.canConsume(")")) {
1108                 if (source instanceof Selector) {
1109                     return nodeLocalName(((Selector)source).name());
1110                 }
1111                 String msg = GraphI18n.functionIsAmbiguous.text("LOCALNAME()", pos.getLine(), pos.getColumn());
1112                 throw new ParsingException(pos, msg);
1113             }
1114             result = nodeLocalName(parseSelectorName(tokens, typeSystem));
1115             tokens.consume(")");
1116         } else if (tokens.canConsume("SCORE", "(")) {
1117             if (tokens.canConsume(")")) {
1118                 if (source instanceof Selector) {
1119                     return fullTextSearchScore(((Selector)source).name());
1120                 }
1121                 String msg = GraphI18n.functionIsAmbiguous.text("SCORE()", pos.getLine(), pos.getColumn());
1122                 throw new ParsingException(pos, msg);
1123             }
1124             result = fullTextSearchScore(parseSelectorName(tokens, typeSystem));
1125             tokens.consume(")");
1126         } else if (tokens.canConsume("DEPTH", "(")) {
1127             if (tokens.canConsume(")")) {
1128                 if (source instanceof Selector) {
1129                     return nodeDepth(((Selector)source).name());
1130                 }
1131                 String msg = GraphI18n.functionIsAmbiguous.text("DEPTH()", pos.getLine(), pos.getColumn());
1132                 throw new ParsingException(pos, msg);
1133             }
1134             result = nodeDepth(parseSelectorName(tokens, typeSystem));
1135             tokens.consume(")");
1136         } else if (tokens.canConsume("PATH", "(")) {
1137             if (tokens.canConsume(")")) {
1138                 if (source instanceof Selector) {
1139                     return nodePath(((Selector)source).name());
1140                 }
1141                 String msg = GraphI18n.functionIsAmbiguous.text("PATH()", pos.getLine(), pos.getColumn());
1142                 throw new ParsingException(pos, msg);
1143             }
1144             result = nodePath(parseSelectorName(tokens, typeSystem));
1145             tokens.consume(")");
1146         } else if (tokens.canConsume("REFERENCE", "(")) {
1147             result = parseReferenceValue(tokens, typeSystem, source);
1148         } else {
1149             result = parsePropertyValue(tokens, typeSystem, source);
1150         }
1151 
1152         // Is this operand followed by an arithmetic operation ...
1153         ArithmeticOperator arithmeticOperator = null;
1154         if (tokens.canConsume('+')) {
1155             arithmeticOperator = ArithmeticOperator.ADD;
1156         } else if (tokens.canConsume('-')) {
1157             arithmeticOperator = ArithmeticOperator.SUBTRACT;
1158         } else if (tokens.canConsume('*')) {
1159             arithmeticOperator = ArithmeticOperator.MULTIPLY;
1160         } else if (tokens.canConsume('/')) {
1161             arithmeticOperator = ArithmeticOperator.DIVIDE;
1162         }
1163         if (arithmeticOperator != null) {
1164             if (tokens.matches('(')) {
1165                 // Don't use precendence, but instead use the next DynamicOperand as the RHS ...
1166                 DynamicOperand right = parseDynamicOperand(tokens, typeSystem, source);
1167                 result = arithmeticOperand(result, arithmeticOperator, right);
1168             } else {
1169                 // There is no parenthesis, so use operator precedence ...
1170                 DynamicOperand right = parseDynamicOperand(tokens, typeSystem, source);
1171                 if (right instanceof ArithmeticOperand) {
1172                     // But the RHS is an arithmetic operand, so we need to use operator precedence ...
1173                     ArithmeticOperand arithRhs = (ArithmeticOperand)right;
1174                     ArithmeticOperator rhsOperator = arithRhs.operator();
1175                     if (arithmeticOperator.precedes(rhsOperator)) {
1176                         // This operand's operator does take precedence, so this must be computed before working with the RHS ...
1177                         DynamicOperand newRhs = arithRhs.right();
1178                         DynamicOperand newLhs = new ArithmeticOperand(result, arithmeticOperator, arithRhs.left());
1179                         result = arithmeticOperand(newLhs, rhsOperator, newRhs);
1180                     } else {
1181                         result = arithmeticOperand(result, arithmeticOperator, right);
1182                     }
1183                 } else {
1184                     // The RHS is just another DynamicOperand ...
1185                     result = arithmeticOperand(result, arithmeticOperator, right);
1186                 }
1187             }
1188         }
1189         return result;
1190     }
1191 
1192     protected PropertyValue parsePropertyValue( TokenStream tokens,
1193                                                 TypeSystem typeSystem,
1194                                                 Source source ) {
1195         Position pos = tokens.nextPosition();
1196         String firstWord = parseName(tokens, typeSystem);
1197         SelectorName selectorName = null;
1198         if (tokens.canConsume('.')) {
1199             // We actually read the selector name, so now read the property name ...
1200             selectorName = new SelectorName(firstWord);
1201             String propertyName = parseName(tokens, typeSystem);
1202             return propertyValue(selectorName, propertyName);
1203         }
1204         // Otherwise the source should be a single named selector
1205         if (source instanceof Selector) {
1206             selectorName = ((Selector)source).aliasOrName();
1207             return propertyValue(selectorName, firstWord);
1208         }
1209         String msg = GraphI18n.mustBeScopedAtLineAndColumn.text(firstWord, pos.getLine(), pos.getColumn());
1210         throw new ParsingException(pos, msg);
1211     }
1212 
1213     protected ReferenceValue parseReferenceValue( TokenStream tokens,
1214                                                   TypeSystem typeSystem,
1215                                                   Source source ) {
1216         Position pos = tokens.nextPosition();
1217         SelectorName selectorName = null;
1218         if (tokens.canConsume(')')) {
1219             // There should be a single source ...
1220             if (source instanceof Selector) {
1221                 selectorName = ((Selector)source).aliasOrName();
1222                 return referenceValue(selectorName);
1223             }
1224             String msg = GraphI18n.functionIsAmbiguous.text("REFERENCE()", pos.getLine(), pos.getColumn());
1225             throw new ParsingException(pos, msg);
1226         }
1227         // Otherwise, there is at least one word inside the parentheses ...
1228         String firstWord = parseName(tokens, typeSystem);
1229         if (tokens.canConsume('.')) {
1230             // We actually read the selector name, so now read the property name ...
1231             selectorName = new SelectorName(firstWord);
1232             String propertyName = parseName(tokens, typeSystem);
1233             return referenceValue(selectorName, propertyName);
1234         }
1235         tokens.consume(")");
1236         // The name may be a selector name, or it may be a property name on the default selector.
1237         // If there is just a single selector ...
1238         if (source instanceof Selector) {
1239             Selector selector = (Selector)source;
1240             // and the selector name matches ...
1241             selectorName = new SelectorName(firstWord);
1242             if (selectorName.equals(selector.name()) || (selector.hasAlias() && selectorName.equals(selector.alias()))) {
1243                 // This is a reference value with just the selector name ...
1244                 return referenceValue(selectorName);
1245             }
1246             // Otherwise, the reference value is just the property name ...
1247             return referenceValue(selector.aliasOrName(), firstWord);
1248         }
1249         // Otherwise, the first word is the name of a selector ...
1250         selectorName = new SelectorName(firstWord);
1251         return referenceValue(selectorName);
1252     }
1253 
1254     protected Limit parseLimit( TokenStream tokens ) {
1255         if (tokens.canConsume("LIMIT")) {
1256             int first = tokens.consumeInteger();
1257             if (tokens.canConsume(',')) {
1258                 // This is of the 'from,to' style ...
1259                 int to = tokens.consumeInteger();
1260                 int offset = to - first;
1261                 if (offset < 0) {
1262                     Position pos = tokens.previousPosition();
1263                     String msg = GraphI18n.secondValueInLimitRangeCannotBeLessThanFirst.text(first,
1264                                                                                              to,
1265                                                                                              pos.getLine(),
1266                                                                                              pos.getColumn());
1267                     throw new ParsingException(pos, msg);
1268                 }
1269                 return limit(offset, first);
1270             }
1271             if (tokens.canConsume("OFFSET")) {
1272                 int offset = tokens.consumeInteger();
1273                 return limit(first, offset);
1274             }
1275             // No offset
1276             return limit(first, 0);
1277         }
1278         return null;
1279     }
1280 
1281     /**
1282      * Remove all leading and trailing single-quotes, double-quotes, or square brackets from the supplied text. If multiple,
1283      * properly-paired quotes or brackets are found, they will all be removed.
1284      * 
1285      * @param text the input text; may not be null
1286      * @return the text without leading and trailing brackets and quotes, or <code>text</code> if there were no square brackets or
1287      *         quotes
1288      */
1289     protected String removeBracketsAndQuotes( String text ) {
1290         return removeBracketsAndQuotes(text, true);
1291     }
1292 
1293     /**
1294      * Remove any leading and trailing single-quotes, double-quotes, or square brackets from the supplied text.
1295      * 
1296      * @param text the input text; may not be null
1297      * @param recursive true if more than one pair of quotes, double-quotes, or square brackets should be removed, or false if
1298      *        just the first pair should be removed
1299      * @return the text without leading and trailing brackets and quotes, or <code>text</code> if there were no square brackets or
1300      *         quotes
1301      */
1302     protected String removeBracketsAndQuotes( String text,
1303                                               boolean recursive ) {
1304         if (text.length() > 0) {
1305             char firstChar = text.charAt(0);
1306             switch (firstChar) {
1307                 case '\'':
1308                 case '"':
1309                     assert text.charAt(text.length() - 1) == firstChar;
1310                     String removed = text.substring(1, text.length() - 1);
1311                     return recursive ? removeBracketsAndQuotes(removed, recursive) : removed;
1312                 case '[':
1313                     assert text.charAt(text.length() - 1) == ']';
1314                     removed = text.substring(1, text.length() - 1);
1315                     return recursive ? removeBracketsAndQuotes(removed, recursive) : removed;
1316             }
1317         }
1318         return text;
1319     }
1320 
1321     protected NamedSelector parseNamedSelector( TokenStream tokens,
1322                                                 TypeSystem typeSystem ) {
1323         SelectorName name = parseSelectorName(tokens, typeSystem);
1324         SelectorName alias = null;
1325         if (tokens.canConsume("AS")) alias = parseSelectorName(tokens, typeSystem);
1326         return new NamedSelector(name, alias);
1327     }
1328 
1329     protected SelectorName parseSelectorName( TokenStream tokens,
1330                                               TypeSystem typeSystem ) {
1331         return new SelectorName(parseName(tokens, typeSystem));
1332     }
1333 
1334     protected String parsePath( TokenStream tokens,
1335                                 TypeSystem typeSystem ) {
1336         return removeBracketsAndQuotes(tokens.consume());
1337     }
1338 
1339     protected String parseName( TokenStream tokens,
1340                                 TypeSystem typeSystem ) {
1341         return removeBracketsAndQuotes(tokens.consume());
1342     }
1343 
1344     protected String parseName( String token,
1345                                 TypeSystem typeSystem ) {
1346         return removeBracketsAndQuotes(token);
1347     }
1348 
1349     protected Query query( Source source,
1350                            Constraint constraint,
1351                            List<? extends Ordering> orderings,
1352                            List<? extends Column> columns,
1353                            Limit limit,
1354                            boolean distinct ) {
1355         return new Query(source, constraint, orderings, columns, limit, distinct);
1356     }
1357 
1358     protected SetQuery setQuery( QueryCommand leftQuery,
1359                                  Operation operation,
1360                                  QueryCommand rightQuery,
1361                                  boolean all ) {
1362         return new SetQuery(leftQuery, operation, rightQuery, all);
1363     }
1364 
1365     protected Length length( PropertyValue propertyValue ) {
1366         return new Length(propertyValue);
1367     }
1368 
1369     protected LowerCase lowerCase( DynamicOperand operand ) {
1370         return new LowerCase(operand);
1371     }
1372 
1373     protected UpperCase upperCase( DynamicOperand operand ) {
1374         return new UpperCase(operand);
1375     }
1376 
1377     protected NodeName nodeName( SelectorName selector ) {
1378         return new NodeName(selector);
1379     }
1380 
1381     protected NodeLocalName nodeLocalName( SelectorName selector ) {
1382         return new NodeLocalName(selector);
1383     }
1384 
1385     protected NodeDepth nodeDepth( SelectorName selector ) {
1386         return new NodeDepth(selector);
1387     }
1388 
1389     protected NodePath nodePath( SelectorName selector ) {
1390         return new NodePath(selector);
1391     }
1392 
1393     protected EquiJoinCondition equiJoinCondition( SelectorName selector1,
1394                                                    String property1,
1395                                                    SelectorName selector2,
1396                                                    String property2 ) {
1397         return new EquiJoinCondition(selector1, property1, selector2, property2);
1398     }
1399 
1400     protected DescendantNodeJoinCondition descendantNodeJoinCondition( SelectorName ancestor,
1401                                                                        SelectorName descendant ) {
1402         return new DescendantNodeJoinCondition(ancestor, descendant);
1403     }
1404 
1405     protected ChildNodeJoinCondition childNodeJoinCondition( SelectorName parent,
1406                                                              SelectorName child ) {
1407         return new ChildNodeJoinCondition(parent, child);
1408     }
1409 
1410     protected SameNodeJoinCondition sameNodeJoinCondition( SelectorName selector1,
1411                                                            SelectorName selector2 ) {
1412         return new SameNodeJoinCondition(selector1, selector2);
1413     }
1414 
1415     protected SameNodeJoinCondition sameNodeJoinCondition( SelectorName selector1,
1416                                                            SelectorName selector2,
1417                                                            String path ) {
1418         return new SameNodeJoinCondition(selector1, selector2, path);
1419     }
1420 
1421     protected Limit limit( int rowCount,
1422                            int offset ) {
1423         return new Limit(rowCount, offset);
1424     }
1425 
1426     protected Column column( SelectorName selectorName,
1427                              String propertyName,
1428                              String columnName ) {
1429         return new Column(selectorName, propertyName, columnName);
1430     }
1431 
1432     protected Join join( Source left,
1433                          JoinType joinType,
1434                          Source right,
1435                          JoinCondition joinCondition ) {
1436         return new Join(left, joinType, right, joinCondition);
1437     }
1438 
1439     protected Not not( Constraint constraint ) {
1440         return new Not(constraint);
1441     }
1442 
1443     protected And and( Constraint constraint1,
1444                        Constraint constraint2 ) {
1445         return new And(constraint1, constraint2);
1446     }
1447 
1448     protected Or or( Constraint constraint1,
1449                      Constraint constraint2 ) {
1450         return new Or(constraint1, constraint2);
1451     }
1452 
1453     protected Between between( DynamicOperand operand,
1454                                StaticOperand lowerBound,
1455                                StaticOperand upperBound,
1456                                boolean lowerInclusive,
1457                                boolean upperInclusive ) {
1458         return new Between(operand, lowerBound, upperBound, lowerInclusive, upperInclusive);
1459     }
1460 
1461     protected SetCriteria setCriteria( DynamicOperand operand,
1462                                        Collection<? extends StaticOperand> values ) {
1463         return new SetCriteria(operand, values);
1464     }
1465 
1466     protected FullTextSearch fullTextSearch( SelectorName name,
1467                                              String propertyName,
1468                                              String expression,
1469                                              Term term ) {
1470         return new FullTextSearch(name, propertyName, expression, term);
1471     }
1472 
1473     protected SameNode sameNode( SelectorName name,
1474                                  String path ) {
1475         return new SameNode(name, path);
1476     }
1477 
1478     protected ChildNode childNode( SelectorName name,
1479                                    String path ) {
1480         return new ChildNode(name, path);
1481     }
1482 
1483     protected DescendantNode descendantNode( SelectorName name,
1484                                              String path ) {
1485         return new DescendantNode(name, path);
1486     }
1487 
1488     protected Comparison comparison( DynamicOperand left,
1489                                      Operator operator,
1490                                      StaticOperand right ) {
1491         return new Comparison(left, operator, right);
1492     }
1493 
1494     protected Ordering ordering( DynamicOperand operand,
1495                                  Order order ) {
1496         return new Ordering(operand, order);
1497     }
1498 
1499     protected PropertyExistence propertyExistence( SelectorName selector,
1500                                                    String propertyName ) {
1501         return new PropertyExistence(selector, propertyName);
1502     }
1503 
1504     protected FullTextSearchScore fullTextSearchScore( SelectorName selector ) {
1505         return new FullTextSearchScore(selector);
1506     }
1507 
1508     protected ArithmeticOperand arithmeticOperand( DynamicOperand leftOperand,
1509                                                    ArithmeticOperator operator,
1510                                                    DynamicOperand rightOperand ) {
1511         return new ArithmeticOperand(leftOperand, operator, rightOperand);
1512     }
1513 
1514     protected PropertyValue propertyValue( SelectorName selector,
1515                                            String propertyName ) {
1516         return new PropertyValue(selector, propertyName);
1517     }
1518 
1519     protected ReferenceValue referenceValue( SelectorName selector ) {
1520         return new ReferenceValue(selector);
1521     }
1522 
1523     protected ReferenceValue referenceValue( SelectorName selector,
1524                                              String propertyName ) {
1525         return new ReferenceValue(selector, propertyName);
1526     }
1527 
1528     protected BindVariableName bindVariableName( String variableName ) {
1529         return new BindVariableName(variableName);
1530     }
1531 
1532     protected Literal literal( TypeSystem typeSystem,
1533                                Object value ) throws ValueFormatException {
1534         return new Literal(value);
1535     }
1536 
1537     /**
1538      * A {@link TokenStream.Tokenizer} implementation that parses words, quoted phrases, comments, and symbols. Words are
1539      * delimited by whitespace and consist only of alpha-number characters plus the underscore character. Quoted phrases are
1540      * delimited by single-quote and double-quote characters (which may be escaped within the quote). Comments are the characters
1541      * starting with '/*' and ending with '&#42;/', or starting with '--' and ending with the next line terminator (or the end of
1542      * the content).
1543      */
1544     public static class SqlTokenizer implements TokenStream.Tokenizer {
1545         /**
1546          * The token type for tokens that represent an unquoted string containing a character sequence made up of non-whitespace
1547          * and non-symbol characters.
1548          */
1549         public static final int WORD = 1;
1550         /**
1551          * The token type for tokens that consist of an individual "symbol" character. The set of characters includes:
1552          * <code>[]<>=-+(),</code>
1553          */
1554         public static final int SYMBOL = 2;
1555         /**
1556          * The token type for tokens that consist of other characters.
1557          */
1558         public static final int OTHER = 3;
1559         /**
1560          * The token type for tokens that consist of all the characters within single-quotes, double-quotes, or square brackets.
1561          */
1562         public static final int QUOTED_STRING = 4;
1563         /**
1564          * The token type for tokens that consist of all the characters between "/*" and "&#42;/" or between "--" and the next
1565          * line terminator (e.g., '\n', '\r' or "\r\n")
1566          */
1567         public static final int COMMENT = 6;
1568 
1569         private final boolean useComments;
1570 
1571         public SqlTokenizer( boolean useComments ) {
1572             this.useComments = useComments;
1573         }
1574 
1575         /**
1576          * {@inheritDoc}
1577          * 
1578          * @see org.modeshape.common.text.TokenStream.Tokenizer#tokenize(CharacterStream, Tokens)
1579          */
1580         public void tokenize( CharacterStream input,
1581                               Tokens tokens ) throws ParsingException {
1582             while (input.hasNext()) {
1583                 char c = input.next();
1584                 switch (c) {
1585                     case ' ':
1586                     case '\t':
1587                     case '\n':
1588                     case '\r':
1589                         // Just skip these whitespace characters ...
1590                         break;
1591                     case '(':
1592                     case ')':
1593                     case '{':
1594                     case '}':
1595                     case '*':
1596                     case '.':
1597                     case ',':
1598                     case ';':
1599                     case '+':
1600                     case '%':
1601                     case '?':
1602                     case '$':
1603                     case ']':
1604                     case '!':
1605                     case '<':
1606                     case '>':
1607                     case '|':
1608                     case '=':
1609                     case ':':
1610                         tokens.addToken(input.position(input.index()), input.index(), input.index() + 1, SYMBOL);
1611                         break;
1612                     case '\'':
1613                     case '[':
1614                     case '\"':
1615                         int startIndex = input.index();
1616                         char closingChar = c == '[' ? ']' : c;
1617                         Position pos = input.position(startIndex);
1618                         boolean foundClosingQuote = false;
1619                         while (input.hasNext()) {
1620                             c = input.next();
1621                             if (c == '\\' && input.isNext(closingChar)) {
1622                                 c = input.next(); // consume the closingChar since it is escaped
1623                             } else if (c == closingChar) {
1624                                 foundClosingQuote = true;
1625                                 break;
1626                             }
1627                         }
1628                         if (!foundClosingQuote) {
1629                             String msg = CommonI18n.noMatchingDoubleQuoteFound.text(pos.getLine(), pos.getColumn());
1630                             if (closingChar == '\'') {
1631                                 msg = CommonI18n.noMatchingSingleQuoteFound.text(pos.getLine(), pos.getColumn());
1632                             } else if (closingChar == ']') {
1633                                 msg = GraphI18n.noMatchingBracketFound.text(pos.getLine(), pos.getColumn());
1634                             }
1635                             throw new ParsingException(pos, msg);
1636                         }
1637                         int endIndex = input.index() + 1; // beyond last character read
1638                         tokens.addToken(pos, startIndex, endIndex, QUOTED_STRING);
1639                         break;
1640                     case '-':
1641                         startIndex = input.index();
1642                         pos = input.position(input.index());
1643                         if (input.isNext('-')) {
1644                             // End-of-line comment ...
1645                             boolean foundLineTerminator = false;
1646                             while (input.hasNext()) {
1647                                 c = input.next();
1648                                 if (c == '\n' || c == '\r') {
1649                                     foundLineTerminator = true;
1650                                     break;
1651                                 }
1652                             }
1653                             endIndex = input.index(); // the token won't include the '\n' or '\r' character(s)
1654                             if (!foundLineTerminator) ++endIndex; // must point beyond last char
1655                             if (c == '\r' && input.isNext('\n')) input.next();
1656                             if (useComments) {
1657                                 tokens.addToken(pos, startIndex, endIndex, COMMENT);
1658                             }
1659                         } else {
1660                             tokens.addToken(input.position(input.index()), input.index(), input.index() + 1, SYMBOL);
1661                             break;
1662                         }
1663                         break;
1664                     case '/':
1665                         startIndex = input.index();
1666                         pos = input.position(input.index());
1667                         if (input.isNext('*')) {
1668                             // Multi-line comment ...
1669                             while (input.hasNext() && !input.isNext('*', '/')) {
1670                                 c = input.next();
1671                             }
1672                             if (input.hasNext()) input.next(); // consume the '*'
1673                             if (input.hasNext()) input.next(); // consume the '/'
1674                             if (useComments) {
1675                                 endIndex = input.index() + 1; // the token will include the quote characters
1676                                 tokens.addToken(pos, startIndex, endIndex, COMMENT);
1677                             }
1678                         } else {
1679                             tokens.addToken(input.position(input.index()), input.index(), input.index() + 1, SYMBOL);
1680                             break;
1681                         }
1682                         break;
1683                     default:
1684                         startIndex = input.index();
1685                         pos = input.position(input.index());
1686                         // Read as long as there is a valid XML character ...
1687                         int tokenType = (Character.isLetterOrDigit(c) || c == '_') ? WORD : OTHER;
1688                         while (input.isNextLetterOrDigit() || input.isNext('_')) {
1689                             c = input.next();
1690                         }
1691                         endIndex = input.index() + 1; // beyond last character that was included
1692                         tokens.addToken(pos, startIndex, endIndex, tokenType);
1693                 }
1694             }
1695         }
1696     }
1697 }