View Javadoc

1   /*
2    * ModeShape (http://www.modeshape.org)
3    * See the COPYRIGHT.txt file distributed with this work for information
4    * regarding copyright ownership.  Some portions may be licensed
5    * to Red Hat, Inc. under one or more contributor license agreements.
6    * See the AUTHORS.txt file in the distribution for a full listing of 
7    * individual contributors.
8    *
9    * ModeShape is free software. Unless otherwise indicated, all code in ModeShape
10   * is licensed to you under the terms of the GNU Lesser General Public License as
11   * published by the Free Software Foundation; either version 2.1 of
12   * the License, or (at your option) any later version.
13   * 
14   * ModeShape is distributed in the hope that it will be useful,
15   * but WITHOUT ANY WARRANTY; without even the implied warranty of
16   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17   * Lesser General Public License for more details.
18   *
19   * You should have received a copy of the GNU Lesser General Public
20   * License along with this software; if not, write to the Free
21   * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
22   * 02110-1301 USA, or see the FSF site: http://www.fsf.org.
23   */
24  package org.modeshape.graph.query.parse;
25  
26  import static org.modeshape.common.text.TokenStream.ANY_VALUE;
27  import java.util.ArrayList;
28  import java.util.Collection;
29  import java.util.Collections;
30  import java.util.List;
31  import java.util.concurrent.atomic.AtomicBoolean;
32  import org.modeshape.common.CommonI18n;
33  import org.modeshape.common.text.ParsingException;
34  import org.modeshape.common.text.Position;
35  import org.modeshape.common.text.TokenStream;
36  import org.modeshape.common.text.TokenStream.CharacterStream;
37  import org.modeshape.common.text.TokenStream.Tokenizer;
38  import org.modeshape.common.text.TokenStream.Tokens;
39  import org.modeshape.common.xml.XmlCharacters;
40  import org.modeshape.graph.GraphI18n;
41  import org.modeshape.graph.property.ValueFormatException;
42  import org.modeshape.graph.query.model.And;
43  import org.modeshape.graph.query.model.ArithmeticOperand;
44  import org.modeshape.graph.query.model.ArithmeticOperator;
45  import org.modeshape.graph.query.model.Between;
46  import org.modeshape.graph.query.model.BindVariableName;
47  import org.modeshape.graph.query.model.ChildNode;
48  import org.modeshape.graph.query.model.ChildNodeJoinCondition;
49  import org.modeshape.graph.query.model.Column;
50  import org.modeshape.graph.query.model.Comparison;
51  import org.modeshape.graph.query.model.Constraint;
52  import org.modeshape.graph.query.model.DescendantNode;
53  import org.modeshape.graph.query.model.DescendantNodeJoinCondition;
54  import org.modeshape.graph.query.model.DynamicOperand;
55  import org.modeshape.graph.query.model.EquiJoinCondition;
56  import org.modeshape.graph.query.model.FullTextSearch;
57  import org.modeshape.graph.query.model.FullTextSearchScore;
58  import org.modeshape.graph.query.model.Join;
59  import org.modeshape.graph.query.model.JoinCondition;
60  import org.modeshape.graph.query.model.JoinType;
61  import org.modeshape.graph.query.model.Length;
62  import org.modeshape.graph.query.model.Limit;
63  import org.modeshape.graph.query.model.Literal;
64  import org.modeshape.graph.query.model.LowerCase;
65  import org.modeshape.graph.query.model.NamedSelector;
66  import org.modeshape.graph.query.model.NodeDepth;
67  import org.modeshape.graph.query.model.NodeLocalName;
68  import org.modeshape.graph.query.model.NodeName;
69  import org.modeshape.graph.query.model.NodePath;
70  import org.modeshape.graph.query.model.Not;
71  import org.modeshape.graph.query.model.Operator;
72  import org.modeshape.graph.query.model.Or;
73  import org.modeshape.graph.query.model.Order;
74  import org.modeshape.graph.query.model.Ordering;
75  import org.modeshape.graph.query.model.PropertyExistence;
76  import org.modeshape.graph.query.model.PropertyValue;
77  import org.modeshape.graph.query.model.Query;
78  import org.modeshape.graph.query.model.QueryCommand;
79  import org.modeshape.graph.query.model.ReferenceValue;
80  import org.modeshape.graph.query.model.SameNode;
81  import org.modeshape.graph.query.model.SameNodeJoinCondition;
82  import org.modeshape.graph.query.model.Selector;
83  import org.modeshape.graph.query.model.SelectorName;
84  import org.modeshape.graph.query.model.SetCriteria;
85  import org.modeshape.graph.query.model.SetQuery;
86  import org.modeshape.graph.query.model.Source;
87  import org.modeshape.graph.query.model.StaticOperand;
88  import org.modeshape.graph.query.model.TypeSystem;
89  import org.modeshape.graph.query.model.UpperCase;
90  import org.modeshape.graph.query.model.FullTextSearch.Term;
91  import org.modeshape.graph.query.model.SetQuery.Operation;
92  import org.modeshape.graph.query.model.TypeSystem.TypeFactory;
93  
94  /**
95   * A {@link QueryParser} implementation that parses a subset of SQL select and set queries.
96   * <p>
97   * This grammar is equivalent to the SQL grammar as defined by the JCR 2.0 specification, with some useful additions:
98   * <ul>
99   * <li>"<code>... (UNION|INTERSECT|EXCEPT) [ALL] ...</code>" to combine and merge results from multiple queries</li>
100  * <li>"<code>SELECT DISTINCT ...</code>" to remove duplicates</li>
101  * <li>"<code>LIMIT count [OFFSET number]</code>" clauses to control the number of results returned as well as the number of rows
102  * that should be skipped</li>
103  * <li>Support for additional join types, including "<code>FULL OUTER JOIN</code>" and "<code>CROSS JOIN</code>"</li>
104  * <li>Additional dynamic operands "<code>DEPTH([&lt;selectorName>])</code>" and "<code>PATH([&lt;selectorName>])</code>" that
105  * enables placing constraints on the node depth and path, respectively, and which can be used in a manner similar to "
106  * <code>NAME([&lt;selectorName>])</code>" and "<code>LOCALNAME([&lt;selectorName>])</code>. Note in each of these cases, the
107  * selector name is optional if there is only one selector in the query.</li>
108  * <li>Additional dynamic operand "<code>REFERENCE([&lt;selectorName>.]&lt;propertyName>])</code>" that
109  * enables placing constraints on one or all reference properties, and which can be used in a manner similar to "
110  * <code>PropertyValue([&lt;selectorName>.]&lt;propertyName>)</code>". Note in each of these cases, the
111  * selector name is optional if there is only one selector in the query, and that the property name can be excluded
112  * if the constraint should apply to all reference properties.</li>
113  * <li>Support for the IN clause and NOT IN clause to more easily supply a list of valid discrete static operands: "
114  * <code>&lt;dynamicOperand> [NOT] IN (&lt;staticOperand> {, &lt;staticOperand>})</code>"</li>
115  * <li>Support for the BETWEEN clause: "<code>&lt;dynamicOperand> [NOT] BETWEEN &lt;lowerBoundStaticOperand> [EXCLUSIVE] AND
116  * &lt;upperBoundStaticOperand> [EXCLUSIVE]</code>"</i>
117  * <li>Support for arithmetic operations ('+', '-', '*', '/') between dynamic operands used in <code>WHERE</code> criteria and <code>ORDER BY</code>
118  * clauses: "<code>WHERE &lt;dynamicOperand> + &lt;dynamicOperand> ...</code>" or "<code>ORDER BY (&lt;dynamicOperand> + &lt;dynamicOperand>) [ASC]</code>".
119  * Note that standard operator precedence is used, but grouping by (potentially nested) parentheses is also supported.</i>
120  * </ul>
121  * </p>
122  * <h3>SQL grammar</h3>
123  * <p>
124  * This section defines the complete grammar for the SQL dialect supported by this parser.
125  * </p>
126  * <h4>Queries</h4>
127  * 
128  * <pre>
129  * QueryCommand ::= Query | SetQuery
130  * 
131  * SetQuery ::= Query ('UNION'|'INTERSECT'|'EXCEPT') [ALL] Query
132  *                  { ('UNION'|'INTERSECT'|'EXCEPT') [ALL] Query }
133  * 
134  * Query ::= 'SELECT' ['DISINCT'] columns
135  *           'FROM' Source
136  *           ['WHERE' Constraint]
137  *           ['ORDER BY' orderings]
138  *           [Limit]
139  * </pre>
140  * 
141  * <h4>Sources</h4>
142  * 
143  * <pre>
144  * Source ::= Selector | Join
145  * 
146  * Selector ::= nodeTypeName ['AS' selectorName]
147  * 
148  * nodeTypeName ::= Name
149  * </pre>
150  * 
151  * <h4>Joins</h4>
152  * 
153  * <pre>
154  * Join ::= left [JoinType] 'JOIN' right 'ON' JoinCondition
155  *          // If JoinType is omitted INNER is assumed.
156  *          
157  * left ::= Source
158  * right ::= Source
159  * 
160  * JoinType ::= Inner | LeftOuter | RightOuter | FullOuter | Cross
161  * 
162  * Inner ::= 'INNER' ['JOIN']
163  * 
164  * LeftOuter ::= 'LEFT JOIN' | 'OUTER JOIN' | 'LEFT OUTER JOIN'
165  * 
166  * RightOuter ::= 'RIGHT OUTER' ['JOIN']
167  * 
168  * RightOuter ::= 'FULL OUTER' ['JOIN']
169  * 
170  * RightOuter ::= 'CROSS' ['JOIN']
171  * 
172  * JoinCondition ::= EquiJoinCondition | SameNodeJoinCondition | ChildNodeJoinCondition | DescendantNodeJoinCondition
173  * </pre>
174  * 
175  * <h5>Equi-join conditions</h5>
176  * 
177  * <pre>
178  * EquiJoinCondition ::= selector1Name'.'property1Name '=' selector2Name'.'property2Name
179  * 
180  * selector1Name ::= selectorName
181  * selector2Name ::= selectorName
182  * property1Name ::= propertyName
183  * property2Name ::= propertyName
184  * </pre>
185  * 
186  * <h5>Same-node join condition</h5>
187  * 
188  * <pre>
189  * SameNodeJoinCondition ::= 'ISSAMENODE(' selector1Name ',' selector2Name [',' selector2Path] ')'
190  * 
191  * selector2Path ::= Path
192  * </pre>
193  * 
194  * <h5>Child-node join condition</h5>
195  * 
196  * <pre>
197  * ChildNodeJoinCondition ::= 'ISCHILDNODE(' childSelectorName ',' parentSelectorName ')'
198  * 
199  * childSelectorName ::= selectorName
200  * parentSelectorName ::= selectorName
201  * </pre>
202  * 
203  * <h5>Descendant-node join condition</h5>
204  * 
205  * <pre>
206  * DescendantNodeJoinCondition ::= 'ISDESCENDANTNODE(' descendantSelectorName ',' ancestorSelectorName ')'
207  * descendantSelectorName ::= selectorName
208  * ancestorSelectorName ::= selectorName
209  * </pre>
210  * 
211  * <h4>Constraints</h4>
212  * 
213  * <pre>
214  * Constraint ::= ConstraintItem | '(' ConstraintItem ')'
215  * 
216  * ConstraintItem ::= And | Or | Not | Comparison | Between | PropertyExistence | SetConstraint | FullTextSearch | 
217  *                    SameNode | ChildNode | DescendantNode
218  * </pre>
219  * 
220  * <h5>And constraint</h5>
221  * 
222  * <pre>
223  * And ::= constraint1 'AND' constraint2
224  * 
225  * constraint1 ::= Constraint
226  * constraint2 ::= Constraint
227  * </pre>
228  * 
229  * <h5>Or constraint</h5>
230  * 
231  * <pre>
232  * Or ::= constraint1 'OR' constraint2
233  * </pre>
234  * 
235  * <h5>Not constraint</h5>
236  * 
237  * <pre>
238  * Not ::= 'NOT' Constraint
239  * </pre>
240  * 
241  * <h5>Comparison constraint</h5>
242  * 
243  * <pre>
244  * Comparison ::= DynamicOperand Operator StaticOperand
245  * 
246  * Operator ::= '=' | '!=' | '<' | '<=' | '>' | '>=' | 'LIKE'
247  * </pre>
248  * 
249  * <h5>Between constraint</h5>
250  * 
251  * <pre>
252  * Between ::= DynamicOperand ['NOT'] 'BETWEEN' lowerBound ['EXCLUSIVE'] 'AND' upperBound ['EXCLUSIVE']
253  * 
254  * lowerBound ::= StaticOperand
255  * upperBound ::= StaticOperand
256  * </pre>
257  * 
258  * <h5>Property existence constraint</h5>
259  * 
260  * <pre>
261  * PropertyExistence ::= selectorName'.'propertyName 'IS' ['NOT'] 'NULL' | 
262  *                       propertyName 'IS' ['NOT'] 'NULL' &#47;* If only one selector exists in this query *&#47;
263  * 
264  * </pre>
265  * 
266  * <h5>Set constraint</h5>
267  * 
268  * <pre>
269  * SetConstraint ::= selectorName'.'propertyName ['NOT'] 'IN' | 
270  *                       propertyName ['NOT'] 'IN' &#47;* If only one selector exists in this query *&#47;
271  *                       '(' firstStaticOperand {',' additionalStaticOperand } ')'
272  * firstStaticOperand ::= StaticOperand
273  * additionalStaticOperand ::= StaticOperand
274  * </pre>
275  * 
276  * <h5>Full-text search constraint</h5>
277  * 
278  * <pre>
279  * FullTextSearch ::= 'CONTAINS(' ([selectorName'.']propertyName | selectorName'.*') 
280  *                            ',' ''' fullTextSearchExpression''' ')'
281  *                    &#47;* If only one selector exists in this query, explicit specification of the selectorName
282  *                       preceding the propertyName is optional *&#47;
283  * fullTextSearchExpression ::= &#47;* a full-text search expression, see {@link FullTextSearchParser} *&#47;
284  * </pre>
285  * 
286  * <h5>Same-node constraint</h5>
287  * 
288  * <pre>
289  * SameNode ::= 'ISSAMENODE(' [selectorName ','] Path ')' 
290  *                    &#47;* If only one selector exists in this query, explicit specification of the selectorName
291  *                       preceding the propertyName is optional *&#47;
292  * </pre>
293  * 
294  * <h5>Child-node constraint</h5>
295  * 
296  * <pre>
297  * ChildNode ::= 'ISCHILDNODE(' [selectorName ','] Path ')' 
298  *                    &#47;* If only one selector exists in this query, explicit specification of the selectorName
299  *                       preceding the propertyName is optional *&#47;
300  * </pre>
301  * 
302  * <h5>Descendant-node constraint</h5>
303  * 
304  * <pre>
305  * DescendantNode ::= 'ISDESCENDANTNODE(' [selectorName ','] Path ')' 
306  *                    /* If only one selector exists in this query, explicit specification of the selectorName
307  *                       preceding the propertyName is optional *&#47;
308  * </pre>
309  * 
310  * <h5>Paths and names</h5>
311  * 
312  * <pre>
313  * 
314  * Name ::= '[' quotedName ']' | '[' simpleName ']' | simpleName
315  * 
316  * quotedName ::= /* A JCR Name (see the JCR specification) *&#47;
317  * simpleName ::= /* A JCR Name that contains only SQL-legal characters (namely letters, digits, and underscore) *&#47;
318  *
319  * Path ::= '[' quotedPath ']' | '[' simplePath ']' | simplePath
320  *
321  * quotedPath ::= /* A JCR Path that contains non-SQL-legal characters *&#47;
322  * simplePath ::= /* A JCR Path (rather Name) that contains only SQL-legal characters (namely letters, digits, and underscore) *&#47;
323  * </pre>
324  * 
325  * <h4>Static operands</h4>
326  * 
327  * <pre>
328  * StaticOperand ::= Literal | BindVariableValue
329  * </pre>
330  * 
331  * <h5>Literal</h5>
332  * 
333  * <pre>
334  * Literal ::= CastLiteral | UncastLiteral
335  * 
336  * CastLiteral ::= 'CAST(' UncastLiteral ' AS ' PropertyType ')'
337  * 
338  * PropertyType ::= 'STRING' | 'BINARY' | 'DATE' | 'LONG' | 'DOUBLE' | 'DECIMAL' | 'BOOLEAN' | 'NAME' | 'PATH' | 
339  *                  'REFERENCE' | 'WEAKREFERENCE' | 'URI'
340  *                  /* 'WEAKREFERENCE' is not currently supported in JCR 1.0 *&#47;
341  *                  
342  * UncastLiteral ::= UnquotedLiteral | ''' UnquotedLiteral ''' | '"' UnquotedLiteral '"'
343  * 
344  * UnquotedLiteral ::= /* String form of a JCR Value, as defined in the JCR specification *&#47;
345  * </pre>
346  * 
347  * <h5>Bind variables</h5>
348  * 
349  * <pre>
350  * BindVariableValue ::= '$'bindVariableName
351  * 
352  * bindVariableName ::= /* A string that conforms to the JCR Name syntax, though the prefix does not need to be
353  *                         a registered namespace prefix. *&#47;
354  * </pre>
355  * 
356  * <h4>Dynamic operands</h4>
357  * 
358  * <pre>
359  * DynamicOperand ::= PropertyValue | ReferenceValue | Length | NodeName | NodeLocalName | NodePath | NodeDepth | 
360  *                    FullTextSearchScore | LowerCase | UpperCase | Arithmetic |
361  *                    '(' DynamicOperand ')'
362  * </pre>
363  * <h5>Property value</h5>
364  * <pre>
365  * PropertyValue ::= [selectorName'.'] propertyName
366  *                    /* If only one selector exists in this query, explicit specification of the selectorName
367  *                       preceding the propertyName is optional *&#47;
368  * </pre>
369  * <h5>Reference value</h5>
370  * <pre>
371  * ReferenceValue ::= 'REFERENCE(' selectorName '.' propertyName ')' |
372  *                    'REFERENCE(' selectorName ')' |
373  *                    'REFERENCE()' |
374  *                    /* If only one selector exists in this query, explicit specification of the selectorName
375  *                       preceding the propertyName is optional. Also, the property name may be excluded 
376  *                       if the constraint should apply to any reference property. *&#47;
377  * </pre>
378  * <h5>Property length</h5>
379  * <pre>
380  * Length ::= 'LENGTH(' PropertyValue ')'
381  * </pre>
382  * <h5>Node name</h5>
383  * <pre>
384  * NodeName ::= 'NAME(' [selectorName] ')'
385  *                    /* If only one selector exists in this query, explicit specification of the selectorName
386  *                       is optional *&#47;
387  * </pre>
388  * <h5>Node local name</h5>
389  * <pre>
390  * NodeLocalName ::= 'LOCALNAME(' [selectorName] ')'
391  *                    /* If only one selector exists in this query, explicit specification of the selectorName
392  *                       is optional *&#47;
393  * </pre>
394  * <h5>Node path</h5>
395  * <pre>
396  * NodePath ::= 'PATH(' [selectorName] ')'
397  *                    /* If only one selector exists in this query, explicit specification of the selectorName
398  *                       is optional *&#47;
399  * </pre>
400  * <h5>Node depth</h5>
401  * <pre>
402  * NodeDepth ::= 'DEPTH(' [selectorName] ')'
403  *                    /* If only one selector exists in this query, explicit specification of the selectorName
404  *                       is optional *&#47;
405  * </pre>
406  * <h5>Full-text search score</h5>
407  * <pre>
408  * FullTextSearchScore ::= 'SCORE(' [selectorName] ')'
409  *                    /* If only one selector exists in this query, explicit specification of the selectorName
410  *                       is optional *&#47;
411  * </pre>
412  * <h5>Lowercase</h5>
413  * <pre>
414  * LowerCase ::= 'LOWER(' DynamicOperand ')'
415  * </pre>
416  * <h5>Uppercase</h5>
417  * <pre>
418  * UpperCase ::= 'UPPER(' DynamicOperand ')'
419  * </pre>
420  * <h5>Arithmetic</h5>
421  * <pre>
422  * Arithmetic ::= DynamicOperand ('+'|'-'|'*'|'/') DynamicOperand
423  * </pre>
424  * 
425  * <h4>Ordering</h4>
426  * 
427  * <pre>
428  * orderings ::= Ordering {',' Ordering}
429  * 
430  * Ordering ::= DynamicOperand [Order]
431  * 
432  * Order ::= 'ASC' | 'DESC'
433  * </pre>
434  * 
435  * <h4>Columns</h4>
436  * 
437  * <pre>
438  * columns ::= (Column ',' {Column}) | '*'
439  * 
440  * Column ::= ([selectorName'.']propertyName ['AS' columnName]) | (selectorName'.*')
441  *                    /* If only one selector exists in this query, explicit specification of the selectorName
442  *                       preceding the propertyName is optional *&#47;
443  * selectorName ::= Name
444  * propertyName ::= Name
445  * columnName ::= Name
446  * </pre>
447  * 
448  * <h4>Limit</h4>
449  * 
450  * <pre>
451  * Limit ::= 'LIMIT' count [ 'OFFSET' offset ]
452  * count ::= /* Positive integer value *&#47;
453  * offset ::= /* Non-negative integer value *&#47;
454  * </pre>
455  */
456 public class SqlQueryParser implements QueryParser {
457 
458     public static final String LANGUAGE = "SQL";
459 
460     /**
461      * {@inheritDoc}
462      * 
463      * @see org.modeshape.graph.query.parse.QueryParser#getLanguage()
464      */
465     public String getLanguage() {
466         return LANGUAGE;
467     }
468 
469     /**
470      * {@inheritDoc}
471      * 
472      * @see java.lang.Object#toString()
473      */
474     @Override
475     public String toString() {
476         return getLanguage();
477     }
478 
479     /**
480      * {@inheritDoc}
481      * 
482      * @see java.lang.Object#equals(java.lang.Object)
483      */
484     @Override
485     public boolean equals( Object obj ) {
486         if (obj == this) return true;
487         if (obj instanceof QueryParser) {
488             QueryParser that = (QueryParser)obj;
489             return this.getLanguage().equals(that.getLanguage());
490         }
491         return false;
492     }
493 
494     /**
495      * {@inheritDoc}
496      * 
497      * @see org.modeshape.graph.query.parse.QueryParser#parseQuery(String, TypeSystem)
498      */
499     public QueryCommand parseQuery( String query,
500                                     TypeSystem typeSystem ) {
501         Tokenizer tokenizer = new SqlTokenizer(false);
502         TokenStream tokens = new TokenStream(query, tokenizer, false);
503         tokens.start();
504         return parseQueryCommand(tokens, typeSystem);
505     }
506 
507     protected QueryCommand parseQueryCommand( TokenStream tokens,
508                                               TypeSystem typeSystem ) {
509         QueryCommand command = null;
510         if (tokens.matches("SELECT")) {
511             command = parseQuery(tokens, typeSystem);
512             while (tokens.hasNext()) {
513                 if (tokens.matchesAnyOf("UNION", "INTERSECT", "EXCEPT")) {
514                     command = parseSetQuery(tokens, command, typeSystem);
515                 } else {
516                     Position pos = tokens.previousPosition();
517                     String msg = GraphI18n.unexpectedToken.text(tokens.consume(), pos.getLine(), pos.getColumn());
518                     throw new ParsingException(pos, msg);
519                 }
520             }
521         } else {
522             // We expected SELECT ...
523             Position pos = tokens.nextPosition();
524             String msg = GraphI18n.unexpectedToken.text(tokens.consume(), pos.getLine(), pos.getColumn());
525             throw new ParsingException(pos, msg);
526         }
527         return command;
528     }
529 
530     protected Query parseQuery( TokenStream tokens,
531                                 TypeSystem typeSystem ) {
532         AtomicBoolean isDistinct = new AtomicBoolean(false);
533         List<ColumnExpression> columnExpressions = parseSelect(tokens, isDistinct, typeSystem);
534         Source source = parseFrom(tokens, typeSystem);
535         Constraint constraint = parseWhere(tokens, typeSystem, source);
536         // Parse the order by and limit (can be in any order) ...
537         List<Ordering> orderings = parseOrderBy(tokens, typeSystem, source);
538         Limit limit = parseLimit(tokens);
539         if (orderings == null) parseOrderBy(tokens, typeSystem, source);
540 
541         // Convert the column expressions to columns ...
542         List<Column> columns = new ArrayList<Column>(columnExpressions.size());
543         for (ColumnExpression expression : columnExpressions) {
544             SelectorName selectorName = expression.getSelectorName();
545             String propertyName = expression.getPropertyName();
546             if (selectorName == null) {
547                 if (source instanceof Selector) {
548                     selectorName = ((Selector)source).getName();
549                 } else {
550                     Position pos = expression.getPosition();
551                     String msg = GraphI18n.mustBeScopedAtLineAndColumn.text(expression, pos.getLine(), pos.getColumn());
552                     throw new ParsingException(pos, msg);
553                 }
554             }
555             columns.add(new Column(selectorName, propertyName, expression.getColumnName()));
556         }
557         // Now create the query ...
558         return new Query(source, constraint, orderings, columns, limit, isDistinct.get());
559     }
560 
561     protected SetQuery parseSetQuery( TokenStream tokens,
562                                       QueryCommand leftHandSide,
563                                       TypeSystem typeSystem ) {
564         Operation operation = null;
565         if (tokens.canConsume("UNION")) {
566             operation = Operation.UNION;
567         } else if (tokens.canConsume("INTERSECT")) {
568             operation = Operation.INTERSECT;
569         } else {
570             tokens.consume("EXCEPT");
571             operation = Operation.EXCEPT;
572         }
573         boolean all = tokens.canConsume("ALL");
574         // Parse the next select
575         QueryCommand rightQuery = parseQuery(tokens, typeSystem);
576         return new SetQuery(leftHandSide, operation, rightQuery, all);
577     }
578 
579     protected List<ColumnExpression> parseSelect( TokenStream tokens,
580                                                   AtomicBoolean isDistinct,
581                                                   TypeSystem typeSystem ) {
582         tokens.consume("SELECT");
583         if (tokens.canConsume("DISTINCT")) isDistinct.set(true);
584         if (tokens.canConsume('*')) {
585             return Collections.emptyList();
586         }
587         List<ColumnExpression> columns = new ArrayList<ColumnExpression>();
588         do {
589             Position position = tokens.nextPosition();
590             String propertyName = removeBracketsAndQuotes(tokens.consume());
591             SelectorName selectorName = null;
592             if (tokens.canConsume('.')) {
593                 // We actually read the selector name, so now read the property name ...
594                 selectorName = new SelectorName(propertyName);
595                 propertyName = removeBracketsAndQuotes(tokens.consume());
596             }
597             String alias = propertyName;
598             if (tokens.canConsume("AS")) alias = removeBracketsAndQuotes(tokens.consume());
599             columns.add(new ColumnExpression(selectorName, propertyName, alias, position));
600         } while (tokens.canConsume(','));
601         return columns;
602     }
603 
604     protected Source parseFrom( TokenStream tokens,
605                                 TypeSystem typeSystem ) {
606         Source source = null;
607         tokens.consume("FROM");
608         source = parseNamedSelector(tokens);
609         while (tokens.hasNext()) {
610             JoinType joinType = null;
611             if (tokens.canConsume("JOIN") || tokens.canConsume("INNER", "JOIN")) {
612                 joinType = JoinType.INNER;
613             } else if (tokens.canConsume("OUTER", "JOIN") || tokens.canConsume("LEFT", "JOIN")
614                        || tokens.canConsume("LEFT", "OUTER", "JOIN")) {
615                 joinType = JoinType.LEFT_OUTER;
616             } else if (tokens.canConsume("RIGHT", "OUTER", "JOIN") || tokens.canConsume("RIGHT", "OUTER")) {
617                 joinType = JoinType.RIGHT_OUTER;
618             } else if (tokens.canConsume("FULL", "OUTER", "JOIN") || tokens.canConsume("FULL", "OUTER")) {
619                 joinType = JoinType.FULL_OUTER;
620             } else if (tokens.canConsume("CROSS", "JOIN") || tokens.canConsume("CROSS")) {
621                 joinType = JoinType.CROSS;
622             }
623             if (joinType == null) break;
624             // Read the name of the selector on the right side of the join ...
625             NamedSelector right = parseNamedSelector(tokens);
626             // Read the join condition ...
627             JoinCondition joinCondition = parseJoinCondition(tokens, typeSystem);
628             // Create the join ...
629             source = new Join(source, joinType, right, joinCondition);
630         }
631         return source;
632     }
633 
634     protected JoinCondition parseJoinCondition( TokenStream tokens,
635                                                 TypeSystem typeSystem ) {
636         tokens.consume("ON");
637         if (tokens.canConsume("ISSAMENODE", "(")) {
638             SelectorName selector1Name = parseSelectorName(tokens);
639             tokens.consume(',');
640             SelectorName selector2Name = parseSelectorName(tokens);
641             if (tokens.canConsume('.')) {
642                 String path = parsePath(tokens, typeSystem);
643                 tokens.consume(')');
644                 return new SameNodeJoinCondition(selector1Name, selector2Name, path);
645             }
646             tokens.consume(')');
647             return new SameNodeJoinCondition(selector1Name, selector2Name);
648         }
649         if (tokens.canConsume("ISCHILDNODE", "(")) {
650             SelectorName child = parseSelectorName(tokens);
651             tokens.consume(',');
652             SelectorName parent = parseSelectorName(tokens);
653             tokens.consume(')');
654             return new ChildNodeJoinCondition(parent, child);
655         }
656         if (tokens.canConsume("ISDESCENDANTNODE", "(")) {
657             SelectorName descendant = parseSelectorName(tokens);
658             tokens.consume(',');
659             SelectorName ancestor = parseSelectorName(tokens);
660             tokens.consume(')');
661             return new DescendantNodeJoinCondition(ancestor, descendant);
662         }
663         SelectorName selector1 = parseSelectorName(tokens);
664         tokens.consume('.');
665         String property1 = parseName(tokens, typeSystem);
666         tokens.consume('=');
667         SelectorName selector2 = parseSelectorName(tokens);
668         tokens.consume('.');
669         String property2 = parseName(tokens, typeSystem);
670         return new EquiJoinCondition(selector1, property1, selector2, property2);
671     }
672 
673     protected Constraint parseWhere( TokenStream tokens,
674                                      TypeSystem typeSystem,
675                                      Source source ) {
676         if (tokens.canConsume("WHERE")) {
677             return parseConstraint(tokens, typeSystem, source);
678         }
679         return null;
680     }
681 
682     protected Constraint parseConstraint( TokenStream tokens,
683                                           TypeSystem typeSystem,
684                                           Source source ) {
685         Constraint constraint = null;
686         Position pos = tokens.nextPosition();
687         if (tokens.canConsume("(")) {
688             constraint = parseConstraint(tokens, typeSystem, source);
689             tokens.consume(")");
690         } else if (tokens.canConsume("NOT")) {
691             tokens.canConsume('(');
692             constraint = new Not(parseConstraint(tokens, typeSystem, source));
693             tokens.canConsume(')');
694         } else if (tokens.canConsume("CONTAINS", "(")) {
695             // Either 'selectorName.propertyName', or 'selectorName.*' or 'propertyName' ...
696             String first = tokens.consume();
697             SelectorName selectorName = null;
698             String propertyName = null;
699             if (tokens.canConsume(".", "*")) {
700                 selectorName = new SelectorName(removeBracketsAndQuotes(first));
701             } else if (tokens.canConsume('.')) {
702                 selectorName = new SelectorName(removeBracketsAndQuotes(first));
703                 propertyName = parseName(tokens, typeSystem);
704             } else {
705                 if (!(source instanceof Selector)) {
706                     String msg = GraphI18n.functionIsAmbiguous.text("CONTAINS()", pos.getLine(), pos.getColumn());
707                     throw new ParsingException(pos, msg);
708                 }
709                 selectorName = ((Selector)source).getName();
710                 propertyName = first;
711             }
712             tokens.consume(',');
713 
714             // Followed by the full text search expression ...
715             String expression = removeBracketsAndQuotes(tokens.consume());
716             Term term = parseFullTextSearchExpression(expression, tokens.previousPosition());
717             tokens.consume(")");
718             constraint = new FullTextSearch(selectorName, propertyName, expression, term);
719         } else if (tokens.canConsume("ISSAMENODE", "(")) {
720             SelectorName selectorName = null;
721             if (tokens.matches(ANY_VALUE, ")")) {
722                 if (!(source instanceof Selector)) {
723                     String msg = GraphI18n.functionIsAmbiguous.text("ISSAMENODE()", pos.getLine(), pos.getColumn());
724                     throw new ParsingException(pos, msg);
725                 }
726                 selectorName = ((Selector)source).getName();
727             } else {
728                 selectorName = parseSelectorName(tokens);
729                 tokens.consume(',');
730             }
731             String path = parsePath(tokens, typeSystem);
732             tokens.consume(')');
733             constraint = new SameNode(selectorName, path);
734         } else if (tokens.canConsume("ISCHILDNODE", "(")) {
735             SelectorName selectorName = null;
736             if (tokens.matches(ANY_VALUE, ")")) {
737                 if (!(source instanceof Selector)) {
738                     String msg = GraphI18n.functionIsAmbiguous.text("ISCHILDNODE()", pos.getLine(), pos.getColumn());
739                     throw new ParsingException(pos, msg);
740                 }
741                 selectorName = ((Selector)source).getName();
742             } else {
743                 selectorName = parseSelectorName(tokens);
744                 tokens.consume(',');
745             }
746             String path = parsePath(tokens, typeSystem);
747             tokens.consume(')');
748             constraint = new ChildNode(selectorName, path);
749         } else if (tokens.canConsume("ISDESCENDANTNODE", "(")) {
750             SelectorName selectorName = null;
751             if (tokens.matches(ANY_VALUE, ")")) {
752                 if (!(source instanceof Selector)) {
753                     String msg = GraphI18n.functionIsAmbiguous.text("ISDESCENDANTNODE()", pos.getLine(), pos.getColumn());
754                     throw new ParsingException(pos, msg);
755                 }
756                 selectorName = ((Selector)source).getName();
757             } else {
758                 selectorName = parseSelectorName(tokens);
759                 tokens.consume(',');
760             }
761             String path = parsePath(tokens, typeSystem);
762             tokens.consume(')');
763             constraint = new DescendantNode(selectorName, path);
764         } else {
765             // First try a property existance ...
766             Position pos2 = tokens.nextPosition();
767             constraint = parsePropertyExistance(tokens, typeSystem, source);
768             if (constraint == null) {
769                 // Try to parse as a dynamic operand ...
770                 DynamicOperand left = parseDynamicOperand(tokens, typeSystem, source);
771                 if (left != null) {
772                     if (tokens.matches('(') && left instanceof PropertyValue) {
773                         // This was probably a bad function that we parsed as the start of a dynamic operation ...
774                         String name = ((PropertyValue)left).getPropertyName(); // this may be the function name
775                         String msg = GraphI18n.expectingConstraintCondition.text(name, pos2.getLine(), pos2.getColumn());
776                         throw new ParsingException(pos, msg);
777                     }
778                     if (tokens.matches("IN", "(") || tokens.matches("NOT", "IN", "(")) {
779                         boolean not = tokens.canConsume("NOT");
780                         Collection<StaticOperand> staticOperands = parseInClause(tokens, typeSystem);
781                         constraint = new SetCriteria(left, staticOperands);
782                         if (not) constraint = new Not(constraint);
783                     } else if (tokens.matches("BETWEEN") || tokens.matches("NOT", "BETWEEN")) {
784                         boolean not = tokens.canConsume("NOT");
785                         tokens.consume("BETWEEN");
786                         StaticOperand lowerBound = parseStaticOperand(tokens, typeSystem);
787                         boolean lowerInclusive = !tokens.canConsume("EXCLUSIVE");
788                         tokens.consume("AND");
789                         StaticOperand upperBound = parseStaticOperand(tokens, typeSystem);
790                         boolean upperInclusive = !tokens.canConsume("EXCLUSIVE");
791                         constraint = new Between(left, lowerBound, upperBound, lowerInclusive, upperInclusive);
792                         if (not) constraint = new Not(constraint);
793                     } else {
794                         Operator operator = parseComparisonOperator(tokens);
795                         StaticOperand right = parseStaticOperand(tokens, typeSystem);
796                         constraint = new Comparison(left, operator, right);
797                     }
798                 }
799                 // else continue ...
800             }
801         }
802         if (constraint == null) {
803             String msg = GraphI18n.expectingConstraintCondition.text(tokens.consume(), pos.getLine(), pos.getColumn());
804             throw new ParsingException(pos, msg);
805         }
806         // AND has higher precedence than OR, so we need to evaluate it first ...
807         while (tokens.canConsume("AND")) {
808             constraint = new And(constraint, parseConstraint(tokens, typeSystem, source));
809         }
810         while (tokens.canConsume("OR")) {
811             constraint = new Or(constraint, parseConstraint(tokens, typeSystem, source));
812         }
813         return constraint;
814     }
815 
816     protected List<StaticOperand> parseInClause( TokenStream tokens,
817                                                  TypeSystem typeSystem ) {
818         List<StaticOperand> result = new ArrayList<StaticOperand>();
819         tokens.consume("IN");
820         tokens.consume("(");
821         if (!tokens.canConsume(")")) {
822             // Not empty, so read the static operands ...
823             do {
824                 result.add(parseStaticOperand(tokens, typeSystem));
825             } while (tokens.canConsume(','));
826             tokens.consume(")");
827         }
828         return result;
829     }
830 
831     protected Term parseFullTextSearchExpression( String expression,
832                                                   Position startOfExpression ) {
833         try {
834             return new FullTextSearchParser().parse(expression);
835         } catch (ParsingException e) {
836             // Convert the position in the exception into a position in the query.
837             Position queryPos = startOfExpression.add(e.getPosition());
838             throw new ParsingException(queryPos, e.getMessage());
839         }
840     }
841 
842     protected Operator parseComparisonOperator( TokenStream tokens ) {
843         if (tokens.canConsume("=")) return Operator.EQUAL_TO;
844         if (tokens.canConsume("LIKE")) return Operator.LIKE;
845         if (tokens.canConsume("!", "=")) return Operator.NOT_EQUAL_TO;
846         if (tokens.canConsume("<", ">")) return Operator.NOT_EQUAL_TO;
847         if (tokens.canConsume("<", "=")) return Operator.LESS_THAN_OR_EQUAL_TO;
848         if (tokens.canConsume(">", "=")) return Operator.GREATER_THAN_OR_EQUAL_TO;
849         if (tokens.canConsume("<")) return Operator.LESS_THAN;
850         if (tokens.canConsume(">")) return Operator.GREATER_THAN;
851         Position pos = tokens.nextPosition();
852         String msg = GraphI18n.expectingComparisonOperator.text(tokens.consume(), pos.getLine(), pos.getColumn());
853         throw new ParsingException(pos, msg);
854     }
855 
856     protected List<Ordering> parseOrderBy( TokenStream tokens,
857                                            TypeSystem typeSystem,
858                                            Source source ) {
859         if (tokens.canConsume("ORDER", "BY")) {
860             List<Ordering> orderings = new ArrayList<Ordering>();
861             do {
862                 orderings.add(parseOrdering(tokens, typeSystem, source));
863             } while (tokens.canConsume(','));
864             return orderings;
865         }
866         return null;
867     }
868 
869     protected Ordering parseOrdering( TokenStream tokens,
870                                       TypeSystem typeSystem,
871                                       Source source ) {
872         DynamicOperand operand = parseDynamicOperand(tokens, typeSystem, source);
873         Order order = Order.ASCENDING;
874         if (tokens.canConsume("DESC")) order = Order.DESCENDING;
875         if (tokens.canConsume("ASC")) order = Order.ASCENDING;
876         return new Ordering(operand, order);
877     }
878 
879     protected Constraint parsePropertyExistance( TokenStream tokens,
880                                                  TypeSystem typeSystem,
881                                                  Source source ) {
882         if (tokens.matches(ANY_VALUE, ".", ANY_VALUE, "IS", "NOT", "NULL")
883             || tokens.matches(ANY_VALUE, ".", ANY_VALUE, "IS", "NULL") || tokens.matches(ANY_VALUE, "IS", "NOT", "NULL")
884             || tokens.matches(ANY_VALUE, "IS", "NULL")) {
885             Position pos = tokens.nextPosition();
886             String firstWord = tokens.consume();
887             SelectorName selectorName = null;
888             String propertyName = null;
889             if (tokens.canConsume('.')) {
890                 // We actually read the selector name, so now read the property name ...
891                 selectorName = new SelectorName(firstWord);
892                 propertyName = parseName(tokens, typeSystem);
893             } else {
894                 // Otherwise the source should be a single named selector
895                 if (!(source instanceof Selector)) {
896                     String msg = GraphI18n.mustBeScopedAtLineAndColumn.text(firstWord, pos.getLine(), pos.getColumn());
897                     throw new ParsingException(pos, msg);
898                 }
899                 selectorName = ((Selector)source).getName();
900                 propertyName = firstWord;
901             }
902             if (tokens.canConsume("IS", "NOT", "NULL")) {
903                 return new PropertyExistence(selectorName, propertyName);
904             }
905             tokens.consume("IS", "NULL");
906             return new Not(new PropertyExistence(selectorName, propertyName));
907         }
908         return null;
909     }
910 
911     protected StaticOperand parseStaticOperand( TokenStream tokens,
912                                                 TypeSystem typeSystem ) {
913         if (tokens.canConsume('$')) {
914             // The variable name must conform to a valid prefix, which is defined as a valid NCName ...
915             String value = tokens.consume();
916             if (!XmlCharacters.isValidNcName(value)) {
917                 Position pos = tokens.previousPosition();
918                 String msg = GraphI18n.bindVariableMustConformToNcName.text(value, pos.getLine(), pos.getColumn());
919                 throw new ParsingException(pos, msg);
920             }
921             return new BindVariableName(value);
922         }
923         return parseLiteral(tokens, typeSystem);
924     }
925 
926     protected Literal parseLiteral( TokenStream tokens,
927                                     TypeSystem typeSystem ) {
928         if (tokens.canConsume("CAST", "(")) {
929             // Get the value that is to be cast ...
930             Position pos = tokens.nextPosition();
931             String value = parseLiteralValue(tokens, typeSystem);
932             // Figure out the type we're supposed to cast to ...
933             tokens.consume("AS");
934             String typeName = tokens.consume();
935             TypeFactory<?> typeFactory = typeSystem.getTypeFactory(typeName);
936             if (typeFactory == null) {
937                 Position typePos = tokens.previousPosition();
938                 String msg = GraphI18n.invalidPropertyType.text(tokens.consume(), typePos.getLine(), typePos.getColumn());
939                 throw new ParsingException(typePos, msg);
940             }
941             // Convert the supplied value to the desired value ...
942             tokens.consume(')');
943             try {
944                 Object literal = typeFactory.create(value);
945                 return new Literal(literal);
946             } catch (ValueFormatException e) {
947                 String msg = GraphI18n.valueCannotBeCastToSpecifiedType.text(value,
948                                                                              pos.getLine(),
949                                                                              pos.getColumn(),
950                                                                              typeFactory.getTypeName(),
951                                                                              e.getMessage());
952                 throw new ParsingException(pos, msg);
953             }
954         }
955         // Just create a literal out of the supplied value ...
956         return new Literal(parseLiteralValue(tokens, typeSystem));
957     }
958 
959     protected String parseLiteralValue( TokenStream tokens,
960                                         TypeSystem typeSystem ) {
961         if (tokens.matches(SqlTokenizer.QUOTED_STRING)) {
962             return removeBracketsAndQuotes(tokens.consume());
963         }
964         TypeFactory<Boolean> booleanFactory = typeSystem.getBooleanFactory();
965         if (booleanFactory != null) {
966             if (tokens.canConsume("TRUE")) return booleanFactory.asString(Boolean.TRUE);
967             if (tokens.canConsume("FALSE")) return booleanFactory.asString(Boolean.FALSE);
968         }
969 
970         // Otherwise it is an unquoted literal value ...
971         Position pos = tokens.nextPosition();
972         String sign = "";
973         if (tokens.canConsume('-')) sign = "-";
974         else if (tokens.canConsume('+')) sign = "";
975 
976         // Try to parse this value as a number ...
977         String integral = tokens.consume();
978         TypeFactory<Double> doubleFactory = typeSystem.getDoubleFactory();
979         if (doubleFactory != null) {
980             String decimal = null;
981             if (tokens.canConsume('.')) {
982                 decimal = tokens.consume();
983                 String value = sign + integral + "." + decimal;
984                 if (decimal.endsWith("e") && (tokens.matches('+') || tokens.matches('-'))) {
985                     // There's more to the number ...
986                     value = value + tokens.consume() + tokens.consume(); // +/-EXP
987                 }
988                 try {
989                     // Convert to a double and then back to a string to get canonical form ...
990                     return doubleFactory.asString(doubleFactory.create(value));
991                 } catch (ValueFormatException e) {
992                     String msg = GraphI18n.expectingLiteralAndUnableToParseAsDouble.text(value, pos.getLine(), pos.getColumn());
993                     throw new ParsingException(pos, msg);
994                 }
995             }
996         }
997         TypeFactory<?> dateTimeFactory = typeSystem.getDateTimeFactory();
998         if (dateTimeFactory != null) {
999             if (tokens.canConsume('-')) {
1000                 // Looks like a date (see Section 3.6.4.3 of the JCR 2.0 specification) ...
1001                 // sYYYY-MM-DDThh:mm:ss.sssTZD
1002                 String year = integral;
1003                 String month = tokens.consume();
1004                 tokens.consume('-');
1005                 String dateAndHour = tokens.consume();
1006                 tokens.consume(':');
1007                 String minutes = tokens.consume();
1008                 tokens.consume(':');
1009                 String seconds = tokens.consume();
1010                 tokens.consume('.');
1011                 String subSeconds = tokens.consume(); // should contain 'T' separator and possibly the TZ name and (if no +/-)
1012                 // hours
1013                 String tzSign = "+";
1014                 String tzHours = "00";
1015                 String tzMinutes = "00";
1016                 String tzDelim = ":";
1017                 if (tokens.canConsume('+')) {
1018                     // the fractionalSeconds did NOT contain the tzHours ...
1019                     tzHours = tokens.consume();
1020                     if (tokens.canConsume(':')) tzMinutes = tokens.consume();
1021                 } else if (tokens.canConsume('-')) {
1022                     // the fractionalSeconds did NOT contain the tzHours ...
1023                     tzSign = "-";
1024                     tzHours = tokens.consume();
1025                     if (tokens.canConsume(':')) tzMinutes = tokens.consume();
1026                 } else if (tokens.canConsume(':')) {
1027                     // fractionalSeconds DID contain the TZ hours (without + or -)
1028                     tzHours = tzSign = "";
1029                     if (tokens.canConsume(':')) tzMinutes = tokens.consume();
1030                 } else if (subSeconds.endsWith("Z")) {
1031                     tzSign = tzMinutes = tzDelim = tzHours = "";
1032                 } else if (subSeconds.endsWith("UTC")) {
1033                     subSeconds = subSeconds.length() > 3 ? subSeconds.substring(0, subSeconds.length() - 3) : subSeconds;
1034                 }
1035                 String value = sign + year + "-" + month + "-" + dateAndHour + ":" + minutes + ":" + seconds + "." + subSeconds
1036                                + tzSign + tzHours + tzDelim + tzMinutes;
1037                 try {
1038                     // Convert to a date and then back to a string to get canonical form ...
1039                     Object dateTime = dateTimeFactory.create(value);
1040                     return dateTimeFactory.asString(dateTime);
1041                 } catch (ValueFormatException e) {
1042                     String msg = GraphI18n.expectingLiteralAndUnableToParseAsDate.text(value, pos.getLine(), pos.getColumn());
1043                     throw new ParsingException(pos, msg);
1044                 }
1045             }
1046         }
1047         TypeFactory<Long> longFactory = typeSystem.getLongFactory();
1048         // try to parse an a long ...
1049         String value = sign + integral;
1050         try {
1051             // Convert to a long and then back to a string to get canonical form ...
1052             return longFactory.asString(longFactory.create(value));
1053         } catch (ValueFormatException e) {
1054             String msg = GraphI18n.expectingLiteralAndUnableToParseAsLong.text(value, pos.getLine(), pos.getColumn());
1055             throw new ParsingException(pos, msg);
1056         }
1057     }
1058 
1059     protected DynamicOperand parseDynamicOperand( TokenStream tokens,
1060                                                   TypeSystem typeSystem,
1061                                                   Source source ) {
1062         DynamicOperand result = null;
1063         Position pos = tokens.nextPosition();
1064         if (tokens.canConsume('(')) {
1065             result = parseDynamicOperand(tokens, typeSystem, source);
1066             tokens.consume(")");
1067         } else if (tokens.canConsume("LENGTH", "(")) {
1068             result = new Length(parsePropertyValue(tokens, typeSystem, source));
1069             tokens.consume(")");
1070         } else if (tokens.canConsume("LOWER", "(")) {
1071             result = new LowerCase(parseDynamicOperand(tokens, typeSystem, source));
1072             tokens.consume(")");
1073         } else if (tokens.canConsume("UPPER", "(")) {
1074             result = new UpperCase(parseDynamicOperand(tokens, typeSystem, source));
1075             tokens.consume(")");
1076         } else if (tokens.canConsume("NAME", "(")) {
1077             if (tokens.canConsume(")")) {
1078                 if (source instanceof Selector) {
1079                     return new NodeName(((Selector)source).getName());
1080                 }
1081                 String msg = GraphI18n.functionIsAmbiguous.text("NAME()", pos.getLine(), pos.getColumn());
1082                 throw new ParsingException(pos, msg);
1083             }
1084             result = new NodeName(parseSelectorName(tokens));
1085             tokens.consume(")");
1086         } else if (tokens.canConsume("LOCALNAME", "(")) {
1087             if (tokens.canConsume(")")) {
1088                 if (source instanceof Selector) {
1089                     return new NodeLocalName(((Selector)source).getName());
1090                 }
1091                 String msg = GraphI18n.functionIsAmbiguous.text("LOCALNAME()", pos.getLine(), pos.getColumn());
1092                 throw new ParsingException(pos, msg);
1093             }
1094             result = new NodeLocalName(parseSelectorName(tokens));
1095             tokens.consume(")");
1096         } else if (tokens.canConsume("SCORE", "(")) {
1097             if (tokens.canConsume(")")) {
1098                 if (source instanceof Selector) {
1099                     return new FullTextSearchScore(((Selector)source).getName());
1100                 }
1101                 String msg = GraphI18n.functionIsAmbiguous.text("SCORE()", pos.getLine(), pos.getColumn());
1102                 throw new ParsingException(pos, msg);
1103             }
1104             result = new FullTextSearchScore(parseSelectorName(tokens));
1105             tokens.consume(")");
1106         } else if (tokens.canConsume("DEPTH", "(")) {
1107             if (tokens.canConsume(")")) {
1108                 if (source instanceof Selector) {
1109                     return new NodeDepth(((Selector)source).getName());
1110                 }
1111                 String msg = GraphI18n.functionIsAmbiguous.text("DEPTH()", pos.getLine(), pos.getColumn());
1112                 throw new ParsingException(pos, msg);
1113             }
1114             result = new NodeDepth(parseSelectorName(tokens));
1115             tokens.consume(")");
1116         } else if (tokens.canConsume("PATH", "(")) {
1117             if (tokens.canConsume(")")) {
1118                 if (source instanceof Selector) {
1119                     return new NodePath(((Selector)source).getName());
1120                 }
1121                 String msg = GraphI18n.functionIsAmbiguous.text("PATH()", pos.getLine(), pos.getColumn());
1122                 throw new ParsingException(pos, msg);
1123             }
1124             result = new NodePath(parseSelectorName(tokens));
1125             tokens.consume(")");
1126         } else if (tokens.canConsume("REFERENCE", "(")) {
1127             result = parseReferenceValue(tokens, typeSystem, source);
1128         } else {
1129             result = parsePropertyValue(tokens, typeSystem, source);
1130         }
1131 
1132         // Is this operand followed by an arithmetic operation ...
1133         ArithmeticOperator arithmeticOperator = null;
1134         if (tokens.canConsume('+')) {
1135             arithmeticOperator = ArithmeticOperator.ADD;
1136         } else if (tokens.canConsume('-')) {
1137             arithmeticOperator = ArithmeticOperator.SUBTRACT;
1138         } else if (tokens.canConsume('*')) {
1139             arithmeticOperator = ArithmeticOperator.MULTIPLY;
1140         } else if (tokens.canConsume('/')) {
1141             arithmeticOperator = ArithmeticOperator.DIVIDE;
1142         }
1143         if (arithmeticOperator != null) {
1144             if (tokens.matches('(')) {
1145                 // Don't use precendence, but instead use the next DynamicOperand as the RHS ...
1146                 DynamicOperand right = parseDynamicOperand(tokens, typeSystem, source);
1147                 result = new ArithmeticOperand(result, arithmeticOperator, right);
1148             } else {
1149                 // There is no parenthesis, so use operator precedence ...
1150                 DynamicOperand right = parseDynamicOperand(tokens, typeSystem, source);
1151                 if (right instanceof ArithmeticOperand) {
1152                     // But the RHS is an arithmetic operand, so we need to use operator precedence ...
1153                     ArithmeticOperand arithRhs = (ArithmeticOperand)right;
1154                     ArithmeticOperator rhsOperator = arithRhs.getOperator();
1155                     if (arithmeticOperator.precedes(rhsOperator)) {
1156                         // This operand's operator does take precedence, so this must be computed before working with the RHS ...
1157                         DynamicOperand newRhs = arithRhs.getRight();
1158                         DynamicOperand newLhs = new ArithmeticOperand(result, arithmeticOperator, arithRhs.getLeft());
1159                         result = new ArithmeticOperand(newLhs, rhsOperator, newRhs);
1160                     } else {
1161                         result = new ArithmeticOperand(result, arithmeticOperator, right);
1162                     }
1163                 } else {
1164                     // The RHS is just another DynamicOperand ...
1165                     result = new ArithmeticOperand(result, arithmeticOperator, right);
1166                 }
1167             }
1168         }
1169         return result;
1170     }
1171 
1172     protected PropertyValue parsePropertyValue( TokenStream tokens,
1173                                                 TypeSystem typeSystem,
1174                                                 Source source ) {
1175         Position pos = tokens.nextPosition();
1176         String firstWord = removeBracketsAndQuotes(tokens.consume());
1177         SelectorName selectorName = null;
1178         if (tokens.canConsume('.')) {
1179             // We actually read the selector name, so now read the property name ...
1180             selectorName = new SelectorName(firstWord);
1181             String propertyName = parseName(tokens, typeSystem);
1182             return new PropertyValue(selectorName, propertyName);
1183         }
1184         // Otherwise the source should be a single named selector
1185         if (source instanceof Selector) {
1186             selectorName = ((Selector)source).getAliasOrName();
1187             return new PropertyValue(selectorName, firstWord);
1188         }
1189         String msg = GraphI18n.mustBeScopedAtLineAndColumn.text(firstWord, pos.getLine(), pos.getColumn());
1190         throw new ParsingException(pos, msg);
1191     }
1192 
1193     protected ReferenceValue parseReferenceValue( TokenStream tokens,
1194                                                   TypeSystem typeSystem,
1195                                                   Source source ) {
1196         Position pos = tokens.nextPosition();
1197         SelectorName selectorName = null;
1198         if (tokens.canConsume(')')) {
1199             // There should be a single source ...
1200             if (source instanceof Selector) {
1201                 selectorName = ((Selector)source).getAliasOrName();
1202                 return new ReferenceValue(selectorName);
1203             }
1204             String msg = GraphI18n.functionIsAmbiguous.text("REFERENCE()", pos.getLine(), pos.getColumn());
1205             throw new ParsingException(pos, msg);
1206         }
1207         // Otherwise, there is at least one word inside the parentheses ...
1208         String firstWord = removeBracketsAndQuotes(tokens.consume());
1209         if (tokens.canConsume('.')) {
1210             // We actually read the selector name, so now read the property name ...
1211             selectorName = new SelectorName(firstWord);
1212             String propertyName = parseName(tokens, typeSystem);
1213             return new ReferenceValue(selectorName, propertyName);
1214         }
1215         tokens.consume(")");
1216         // The name may be a selector name, or it may be a property name on the default selector.
1217         // If there is just a single selector ...
1218         if (source instanceof Selector) {
1219             Selector selector = (Selector)source;
1220             // and the selector name matches ...
1221             selectorName = new SelectorName(firstWord);
1222             if (selectorName.equals(selector.getName()) || (selector.hasAlias() && selectorName.equals(selector.getAlias()))) {
1223                 // This is a reference value with just the selector name ...
1224                 return new ReferenceValue(selectorName);
1225             }
1226             // Otherwise, the reference value is just the property name ...
1227             return new ReferenceValue(selector.getAliasOrName(), firstWord);
1228         }
1229         // Otherwise, the first word is the name of a selector ...
1230         selectorName = new SelectorName(firstWord);
1231         return new ReferenceValue(selectorName);
1232     }
1233 
1234     protected Limit parseLimit( TokenStream tokens ) {
1235         if (tokens.canConsume("LIMIT")) {
1236             int first = tokens.consumeInteger();
1237             if (tokens.canConsume(',')) {
1238                 // This is of the 'from,to' style ...
1239                 int to = tokens.consumeInteger();
1240                 int offset = to - first;
1241                 if (offset < 0) {
1242                     Position pos = tokens.previousPosition();
1243                     String msg = GraphI18n.secondValueInLimitRangeCannotBeLessThanFirst.text(first,
1244                                                                                              to,
1245                                                                                              pos.getLine(),
1246                                                                                              pos.getColumn());
1247                     throw new ParsingException(pos, msg);
1248                 }
1249                 return new Limit(offset, first);
1250             }
1251             if (tokens.canConsume("OFFSET")) {
1252                 int offset = tokens.consumeInteger();
1253                 return new Limit(first, offset);
1254             }
1255             // No offset
1256             return new Limit(first, 0);
1257         }
1258         return null;
1259     }
1260 
1261     /**
1262      * Remove any leading and trailing single-quotes, double-quotes, or square brackets from the supplied text.
1263      * 
1264      * @param text the input text; may not be null
1265      * @return the text without leading and trailing brackets and quotes, or <code>text</code> if there were no square brackets or
1266      *         quotes
1267      */
1268     protected String removeBracketsAndQuotes( String text ) {
1269         if (text.length() > 0) {
1270             char firstChar = text.charAt(0);
1271             switch (firstChar) {
1272                 case '\'':
1273                 case '"':
1274                     assert text.charAt(text.length() - 1) == firstChar;
1275                     return removeBracketsAndQuotes(text.substring(1, text.length() - 1));
1276                 case '[':
1277                     assert text.charAt(text.length() - 1) == ']';
1278                     return removeBracketsAndQuotes(text.substring(1, text.length() - 1));
1279             }
1280         }
1281         return text;
1282     }
1283 
1284     protected NamedSelector parseNamedSelector( TokenStream tokens ) {
1285         SelectorName name = parseSelectorName(tokens);
1286         SelectorName alias = null;
1287         if (tokens.canConsume("AS")) alias = parseSelectorName(tokens);
1288         return new NamedSelector(name, alias);
1289     }
1290 
1291     protected SelectorName parseSelectorName( TokenStream tokens ) {
1292         return new SelectorName(removeBracketsAndQuotes(tokens.consume()));
1293     }
1294 
1295     protected String parsePath( TokenStream tokens,
1296                                 TypeSystem typeSystem ) {
1297         return removeBracketsAndQuotes(tokens.consume());
1298     }
1299 
1300     protected String parseName( TokenStream tokens,
1301                                 TypeSystem typeSystem ) {
1302         return removeBracketsAndQuotes(tokens.consume());
1303     }
1304 
1305     /**
1306      * A {@link TokenStream.Tokenizer} implementation that parses words, quoted phrases, comments, and symbols. Words are
1307      * delimited by whitespace and consist only of alpha-number characters plus the underscore character. Quoted phrases are
1308      * delimited by single-quote and double-quote characters (which may be escaped within the quote). Comments are the characters
1309      * starting with '/*' and ending with '&#42;/', or starting with '--' and ending with the next line terminator (or the end of
1310      * the content).
1311      */
1312     public static class SqlTokenizer implements TokenStream.Tokenizer {
1313         /**
1314          * The token type for tokens that represent an unquoted string containing a character sequence made up of non-whitespace
1315          * and non-symbol characters.
1316          */
1317         public static final int WORD = 1;
1318         /**
1319          * The token type for tokens that consist of an individual "symbol" character. The set of characters includes:
1320          * <code>[]<>=-+(),</code>
1321          */
1322         public static final int SYMBOL = 2;
1323         /**
1324          * The token type for tokens that consist of other characters.
1325          */
1326         public static final int OTHER = 3;
1327         /**
1328          * The token type for tokens that consist of all the characters within single-quotes, double-quotes, or square brackets.
1329          */
1330         public static final int QUOTED_STRING = 4;
1331         /**
1332          * The token type for tokens that consist of all the characters between "/*" and "&#42;/" or between "--" and the next
1333          * line terminator (e.g., '\n', '\r' or "\r\n")
1334          */
1335         public static final int COMMENT = 6;
1336 
1337         private final boolean useComments;
1338 
1339         public SqlTokenizer( boolean useComments ) {
1340             this.useComments = useComments;
1341         }
1342 
1343         /**
1344          * {@inheritDoc}
1345          * 
1346          * @see org.modeshape.common.text.TokenStream.Tokenizer#tokenize(CharacterStream, Tokens)
1347          */
1348         public void tokenize( CharacterStream input,
1349                               Tokens tokens ) throws ParsingException {
1350             while (input.hasNext()) {
1351                 char c = input.next();
1352                 switch (c) {
1353                     case ' ':
1354                     case '\t':
1355                     case '\n':
1356                     case '\r':
1357                         // Just skip these whitespace characters ...
1358                         break;
1359                     case '(':
1360                     case ')':
1361                     case '{':
1362                     case '}':
1363                     case '*':
1364                     case '.':
1365                     case ',':
1366                     case ';':
1367                     case '+':
1368                     case '%':
1369                     case '?':
1370                     case '$':
1371                     case ']':
1372                     case '!':
1373                     case '<':
1374                     case '>':
1375                     case '|':
1376                     case '=':
1377                     case ':':
1378                         tokens.addToken(input.position(input.index()), input.index(), input.index() + 1, SYMBOL);
1379                         break;
1380                     case '\'':
1381                     case '[':
1382                     case '\"':
1383                         int startIndex = input.index();
1384                         char closingChar = c == '[' ? ']' : c;
1385                         Position pos = input.position(startIndex);
1386                         boolean foundClosingQuote = false;
1387                         while (input.hasNext()) {
1388                             c = input.next();
1389                             if (c == '\\' && input.isNext(closingChar)) {
1390                                 c = input.next(); // consume the closingChar since it is escaped
1391                             } else if (c == closingChar) {
1392                                 foundClosingQuote = true;
1393                                 break;
1394                             }
1395                         }
1396                         if (!foundClosingQuote) {
1397                             String msg = CommonI18n.noMatchingDoubleQuoteFound.text(pos.getLine(), pos.getColumn());
1398                             if (closingChar == '\'') {
1399                                 msg = CommonI18n.noMatchingSingleQuoteFound.text(pos.getLine(), pos.getColumn());
1400                             } else if (closingChar == ']') {
1401                                 msg = GraphI18n.noMatchingBracketFound.text(pos.getLine(), pos.getColumn());
1402                             }
1403                             throw new ParsingException(pos, msg);
1404                         }
1405                         int endIndex = input.index() + 1; // beyond last character read
1406                         tokens.addToken(pos, startIndex, endIndex, QUOTED_STRING);
1407                         break;
1408                     case '-':
1409                         startIndex = input.index();
1410                         pos = input.position(input.index());
1411                         if (input.isNext('-')) {
1412                             // End-of-line comment ...
1413                             boolean foundLineTerminator = false;
1414                             while (input.hasNext()) {
1415                                 c = input.next();
1416                                 if (c == '\n' || c == '\r') {
1417                                     foundLineTerminator = true;
1418                                     break;
1419                                 }
1420                             }
1421                             endIndex = input.index(); // the token won't include the '\n' or '\r' character(s)
1422                             if (!foundLineTerminator) ++endIndex; // must point beyond last char
1423                             if (c == '\r' && input.isNext('\n')) input.next();
1424                             if (useComments) {
1425                                 tokens.addToken(pos, startIndex, endIndex, COMMENT);
1426                             }
1427                         } else {
1428                             tokens.addToken(input.position(input.index()), input.index(), input.index() + 1, SYMBOL);
1429                             break;
1430                         }
1431                         break;
1432                     case '/':
1433                         startIndex = input.index();
1434                         pos = input.position(input.index());
1435                         if (input.isNext('*')) {
1436                             // Multi-line comment ...
1437                             while (input.hasNext() && !input.isNext('*', '/')) {
1438                                 c = input.next();
1439                             }
1440                             if (input.hasNext()) input.next(); // consume the '*'
1441                             if (input.hasNext()) input.next(); // consume the '/'
1442                             if (useComments) {
1443                                 endIndex = input.index() + 1; // the token will include the quote characters
1444                                 tokens.addToken(pos, startIndex, endIndex, COMMENT);
1445                             }
1446                         } else {
1447                             tokens.addToken(input.position(input.index()), input.index(), input.index() + 1, SYMBOL);
1448                             break;
1449                         }
1450                         break;
1451                     default:
1452                         startIndex = input.index();
1453                         pos = input.position(input.index());
1454                         // Read as long as there is a valid XML character ...
1455                         int tokenType = (Character.isLetterOrDigit(c) || c == '_') ? WORD : OTHER;
1456                         while (input.isNextLetterOrDigit() || input.isNext('_')) {
1457                             c = input.next();
1458                         }
1459                         endIndex = input.index() + 1; // beyond last character that was included
1460                         tokens.addToken(pos, startIndex, endIndex, tokenType);
1461                 }
1462             }
1463         }
1464     }
1465 }