CanonicalPlanner xref

View Javadoc

1   /*
2    * ModeShape (http://www.modeshape.org)
3    * See the COPYRIGHT.txt file distributed with this work for information
4    * regarding copyright ownership.  Some portions may be licensed
5    * to Red Hat, Inc. under one or more contributor license agreements.
6    * See the AUTHORS.txt file in the distribution for a full listing of 
7    * individual contributors.
8    *
9    * ModeShape is free software. Unless otherwise indicated, all code in ModeShape
10   * is licensed to you under the terms of the GNU Lesser General Public License as
11   * published by the Free Software Foundation; either version 2.1 of
12   * the License, or (at your option) any later version.
13   * 
14   * ModeShape is distributed in the hope that it will be useful,
15   * but WITHOUT ANY WARRANTY; without even the implied warranty of
16   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17   * Lesser General Public License for more details.
18   *
19   * You should have received a copy of the GNU Lesser General Public
20   * License along with this software; if not, write to the Free
21   * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
22   * 02110-1301 USA, or see the FSF site: http://www.fsf.org.
23   */
24  package org.modeshape.graph.query.plan;
25  
26  import java.util.ArrayList;
27  import java.util.HashMap;
28  import java.util.LinkedList;
29  import java.util.List;
30  import java.util.Map;
31  import org.modeshape.graph.GraphI18n;
32  import org.modeshape.graph.query.QueryContext;
33  import org.modeshape.graph.query.model.AllNodes;
34  import org.modeshape.graph.query.model.And;
35  import org.modeshape.graph.query.model.Column;
36  import org.modeshape.graph.query.model.Constraint;
37  import org.modeshape.graph.query.model.FullTextSearch;
38  import org.modeshape.graph.query.model.Join;
39  import org.modeshape.graph.query.model.JoinType;
40  import org.modeshape.graph.query.model.Limit;
41  import org.modeshape.graph.query.model.NamedSelector;
42  import org.modeshape.graph.query.model.Ordering;
43  import org.modeshape.graph.query.model.Query;
44  import org.modeshape.graph.query.model.QueryCommand;
45  import org.modeshape.graph.query.model.Selector;
46  import org.modeshape.graph.query.model.SelectorName;
47  import org.modeshape.graph.query.model.SetQuery;
48  import org.modeshape.graph.query.model.Source;
49  import org.modeshape.graph.query.model.Visitors;
50  import org.modeshape.graph.query.plan.PlanNode.Property;
51  import org.modeshape.graph.query.plan.PlanNode.Type;
52  import org.modeshape.graph.query.validate.Schemata;
53  import org.modeshape.graph.query.validate.Validator;
54  import org.modeshape.graph.query.validate.Schemata.Table;
55  import org.modeshape.graph.query.validate.Schemata.View;
56  
57  /**
58   * The planner that produces a canonical query plan given a {@link QueryCommand query command}.
59   * <p>
60   * A canonical plan always has the same structure:
61   * 
62   * <pre>
63   *       LIMIT       if row limit or offset are used
64   *         |
65   *      SORTING      if 'ORDER BY' is used
66   *         |
67   *     DUP_REMOVE    if 'SELECT DISTINCT' is used
68   *         |
69   *      PROJECT      with the list of columns being SELECTed
70   *         |
71   *       GROUP       if 'GROUP BY' is used
72   *         |
73   *      SELECT1
74   *         |         One or more SELECT plan nodes that each have
75   *      SELECT2      a single non-join constraint that are then all AND-ed
76   *         |         together (see {@link #separateAndConstraints(Constraint, List)})
77   *      SELECTn
78   *         |
79   *    SOURCE or JOIN     A single SOURCE or JOIN node, depending upon the query
80   *              /  \
81   *             /    \
82   *           SOJ    SOJ    A SOURCE or JOIN node for the left and right side of the JOIN
83   * </pre>
84   * <p>
85   * There leaves of the tree are always SOURCE nodes, so <i>conceptually</i> data always flows through this plan from the bottom
86   * SOURCE nodes, is adjusted/filtered as it trickles up through the plan, and is then ready to be used by the caller as it emerges
87   * from the top node of the plan.
88   * </p>
89   * <p>
90   * This canonical plan, however, is later optimized and rearranged so that it performs faster.
91   * </p>
92   */
93  public class CanonicalPlanner implements Planner {
94  
95      /**
96       * {@inheritDoc}
97       * 
98       * @see org.modeshape.graph.query.plan.Planner#createPlan(org.modeshape.graph.query.QueryContext,
99       *      org.modeshape.graph.query.model.QueryCommand)
100      */
101     public PlanNode createPlan( QueryContext context,
102                                 QueryCommand query ) {
103         PlanNode plan = null;
104         if (query instanceof Query) {
105             plan = createCanonicalPlan(context, (Query)query);
106         } else {
107             plan = createCanonicalPlan(context, (SetQuery)query);
108         }
109         return plan;
110     }
111 
112     /**
113      * Create a canonical query plan for the given query.
114      * 
115      * @param context the context in which the query is being planned
116      * @param query the query to be planned
117      * @return the root node of the plan tree representing the canonical plan
118      */
119     protected PlanNode createCanonicalPlan( QueryContext context,
120                                             Query query ) {
121         PlanNode plan = null;
122 
123         // Process the source of the query ...
124         Map<SelectorName, Table> usedSources = new HashMap<SelectorName, Table>();
125         plan = createPlanNode(context, query.source(), usedSources);
126 
127         // Attach criteria (on top) ...
128         plan = attachCriteria(context, plan, query.constraint());
129 
130         // Attach groupbys (on top) ...
131         // plan = attachGrouping(context,plan,query.getGroupBy());
132 
133         // Attach the project ...
134         plan = attachProject(context, plan, query.columns(), usedSources);
135 
136         // Attach duplicate removal ...
137         if (query.isDistinct()) {
138             plan = attachDuplicateRemoval(context, plan);
139         }
140 
141         // Process the orderings and limits ...
142         plan = attachSorting(context, plan, query.orderings());
143         plan = attachLimits(context, plan, query.limits());
144 
145         // Validate that all the parts of the query are resolvable ...
146         validate(context, query, usedSources);
147 
148         return plan;
149     }
150 
151     /**
152      * Validate the supplied query.
153      * 
154      * @param context the context in which the query is being planned
155      * @param query the set query to be planned
156      * @param usedSelectors the map of {@link SelectorName}s (aliases or names) used in the query.
157      */
158     protected void validate( QueryContext context,
159                              QueryCommand query,
160                              Map<SelectorName, Table> usedSelectors ) {
161         // Resolve everything ...
162         Visitors.visitAll(query, new Validator(context, usedSelectors));
163     }
164 
165     /**
166      * Create a canonical query plan for the given set query.
167      * 
168      * @param context the context in which the query is being planned
169      * @param query the set query to be planned
170      * @return the root node of the plan tree representing the canonical plan
171      */
172     protected PlanNode createCanonicalPlan( QueryContext context,
173                                             SetQuery query ) {
174         // Process the left and right parts of the query ...
175         PlanNode left = createPlan(context, query.left());
176         PlanNode right = createPlan(context, query.right());
177 
178         // Wrap in a set operation node ...
179         PlanNode plan = new PlanNode(Type.SET_OPERATION);
180         plan.addChildren(left, right);
181         plan.setProperty(Property.SET_OPERATION, query.operation());
182         plan.setProperty(Property.SET_USE_ALL, query.isAll());
183 
184         // Process the orderings and limits ...
185         plan = attachSorting(context, plan, query.orderings());
186         plan = attachLimits(context, plan, query.limits());
187         return plan;
188     }
189 
190     /**
191      * Create a JOIN or SOURCE node that contain the source information.
192      * 
193      * @param context the execution context
194      * @param source the source to be processed; may not be null
195      * @param usedSelectors the map of {@link SelectorName}s (aliases or names) used in the query.
196      * @return the new plan; never null
197      */
198     protected PlanNode createPlanNode( QueryContext context,
199                                        Source source,
200                                        Map<SelectorName, Table> usedSelectors ) {
201         if (source instanceof Selector) {
202             // No join required ...
203             assert source instanceof AllNodes || source instanceof NamedSelector;
204             Selector selector = (Selector)source;
205             PlanNode node = new PlanNode(Type.SOURCE);
206             if (selector.hasAlias()) {
207                 node.addSelector(selector.alias());
208                 node.setProperty(Property.SOURCE_ALIAS, selector.alias());
209                 node.setProperty(Property.SOURCE_NAME, selector.name());
210             } else {
211                 node.addSelector(selector.name());
212                 node.setProperty(Property.SOURCE_NAME, selector.name());
213             }
214             // Validate the source name and set the available columns ...
215             Table table = context.getSchemata().getTable(selector.name());
216             if (table != null) {
217                 if (table instanceof View) context.getHints().hasView = true;
218                 if (usedSelectors.put(selector.aliasOrName(), table) != null) {
219                     // There was already a table with this alias or name ...
220                 }
221                 node.setProperty(Property.SOURCE_COLUMNS, table.getColumns());
222             } else {
223                 context.getProblems().addError(GraphI18n.tableDoesNotExist, selector.name());
224             }
225             return node;
226         }
227         if (source instanceof Join) {
228             Join join = (Join)source;
229             // Set up new join node corresponding to this join predicate
230             PlanNode node = new PlanNode(Type.JOIN);
231             node.setProperty(Property.JOIN_TYPE, join.type());
232             node.setProperty(Property.JOIN_ALGORITHM, JoinAlgorithm.NESTED_LOOP);
233             node.setProperty(Property.JOIN_CONDITION, join.joinCondition());
234 
235             context.getHints().hasJoin = true;
236             if (join.type() == JoinType.LEFT_OUTER) {
237                 context.getHints().hasOptionalJoin = true;
238             }
239 
240             // Handle each child
241             Source[] clauses = new Source[] {join.left(), join.right()};
242             for (int i = 0; i < 2; i++) {
243                 PlanNode sourceNode = createPlanNode(context, clauses[i], usedSelectors);
244                 node.addLastChild(sourceNode);
245 
246                 // Add selectors to the joinNode
247                 for (PlanNode child : node.getChildren()) {
248                     node.addSelectors(child.getSelectors());
249                 }
250             }
251             return node;
252         }
253         // should not get here; if we do, somebody added a new type of source
254         assert false;
255         return null;
256     }
257 
258     /**
259      * Attach all criteria above the join nodes. The optimizer will push these criteria down to the appropriate source.
260      * 
261      * @param context the context in which the query is being planned
262      * @param plan the existing plan, which joins all source groups
263      * @param constraint the criteria or constraint from the query
264      * @return the updated plan, or the existing plan if there were no constraints; never null
265      */
266     protected PlanNode attachCriteria( final QueryContext context,
267                                        PlanNode plan,
268                                        Constraint constraint ) {
269         if (constraint == null) return plan;
270         context.getHints().hasCriteria = true;
271 
272         // Extract the list of Constraint objects that all must be satisfied ...
273         LinkedList<Constraint> andableConstraints = new LinkedList<Constraint>();
274         separateAndConstraints(constraint, andableConstraints);
275         assert !andableConstraints.isEmpty();
276 
277         // For each of these constraints, create a criteria (SELECT) node above the supplied (JOIN or SOURCE) node.
278         // Do this in reverse order so that the top-most SELECT node corresponds to the first constraint.
279         while (!andableConstraints.isEmpty()) {
280             Constraint criteria = andableConstraints.removeLast();
281             // Create the select node ...
282             PlanNode criteriaNode = new PlanNode(Type.SELECT);
283             criteriaNode.setProperty(Property.SELECT_CRITERIA, criteria);
284 
285             // Add selectors to the criteria node ...
286             criteriaNode.addSelectors(Visitors.getSelectorsReferencedBy(criteria));
287 
288             // Is a full-text search of some kind included ...
289             Visitors.visitAll(criteria, new Visitors.AbstractVisitor() {
290                 @Override
291                 public void visit( FullTextSearch obj ) {
292                     context.getHints().hasFullTextSearch = true;
293                 }
294             });
295 
296             criteriaNode.addFirstChild(plan);
297             plan = criteriaNode;
298         }
299         return plan;
300     }
301 
302     /**
303      * Walk the supplied constraint to extract a list of the constraints that can be AND-ed together. For example, given the
304      * constraint tree ((C1 AND C2) AND (C3 OR C4)), this method would result in a list of three separate criteria: [C1,C2,(C3 OR
305      * C4)]. The resulting <code>andConstraints</code> list will contain Constraint objects that all must be true.
306      * 
307      * @param constraint the input constraint
308      * @param andableConstraints the collection into which all non-{@link And AND} constraints should be placed
309      */
310     protected void separateAndConstraints( Constraint constraint,
311                                            List<Constraint> andableConstraints ) {
312         if (constraint == null) return;
313         assert andableConstraints != null;
314         if (constraint instanceof And) {
315             And and = (And)constraint;
316             separateAndConstraints(and.left(), andableConstraints);
317             separateAndConstraints(and.right(), andableConstraints);
318         } else {
319             andableConstraints.add(constraint);
320         }
321     }
322 
323     /**
324      * Attach SORT node at top of tree. The SORT may be pushed down to a source (or sources) if possible by the optimizer.
325      * 
326      * @param context the context in which the query is being planned
327      * @param plan the existing plan
328      * @param orderings list of orderings from the query
329      * @return the updated plan, or the existing plan if there were no orderings; never null
330      */
331     protected PlanNode attachSorting( QueryContext context,
332                                       PlanNode plan,
333                                       List<? extends Ordering> orderings ) {
334         if (orderings.isEmpty()) return plan;
335         PlanNode sortNode = new PlanNode(Type.SORT);
336 
337         context.getHints().hasSort = true;
338         sortNode.setProperty(Property.SORT_ORDER_BY, orderings);
339         for (Ordering ordering : orderings) {
340             sortNode.addSelectors(Visitors.getSelectorsReferencedBy(ordering));
341         }
342 
343         sortNode.addLastChild(plan);
344         return sortNode;
345     }
346 
347     /**
348      * Attach a LIMIT node at the top of the plan tree.
349      * 
350      * @param context the context in which the query is being planned
351      * @param plan the existing plan
352      * @param limit the limit definition; may be null
353      * @return the updated plan, or the existing plan if there were no limits
354      */
355     protected PlanNode attachLimits( QueryContext context,
356                                      PlanNode plan,
357                                      Limit limit ) {
358         if (limit.isUnlimited()) return plan;
359         context.getHints().hasLimit = true;
360         PlanNode limitNode = new PlanNode(Type.LIMIT);
361 
362         boolean attach = false;
363         if (limit.offset() != 0) {
364             limitNode.setProperty(Property.LIMIT_OFFSET, limit.offset());
365             attach = true;
366         }
367         if (!limit.isUnlimited()) {
368             limitNode.setProperty(Property.LIMIT_COUNT, limit.rowLimit());
369             attach = true;
370         }
371         if (attach) {
372             limitNode.addLastChild(plan);
373             plan = limitNode;
374         }
375         return plan;
376     }
377 
378     /**
379      * Attach a PROJECT node at the top of the plan tree.
380      * 
381      * @param context the context in which the query is being planned
382      * @param plan the existing plan
383      * @param columns the columns being projected; may be null
384      * @param selectors the selectors keyed by their alias or name
385      * @return the updated plan
386      */
387     protected PlanNode attachProject( QueryContext context,
388                                       PlanNode plan,
389                                       List<? extends Column> columns,
390                                       Map<SelectorName, Table> selectors ) {
391         PlanNode projectNode = new PlanNode(Type.PROJECT);
392 
393         List<Column> newColumns = new LinkedList<Column>();
394         List<String> newTypes = new ArrayList<String>();
395         if (columns == null || columns.isEmpty()) {
396             // SELECT *, so find all of the columns that are available from all the sources ...
397             for (Map.Entry<SelectorName, Table> entry : selectors.entrySet()) {
398                 SelectorName tableName = entry.getKey();
399                 Table table = entry.getValue();
400                 // Add the selector that is being used ...
401                 projectNode.addSelector(tableName);
402                 // Compute the columns from this selector ...
403                 allColumnsFor(table, tableName, newColumns, newTypes);
404             }
405         } else {
406             // Add the selector used by each column ...
407             for (Column column : columns) {
408                 SelectorName tableName = column.selectorName();
409                 // Add the selector that is being used ...
410                 projectNode.addSelector(tableName);
411 
412                 // Verify that each column is available in the appropriate source ...
413                 Table table = selectors.get(tableName);
414                 if (table == null) {
415                     context.getProblems().addError(GraphI18n.tableDoesNotExist, tableName);
416                 } else {
417                     // Make sure that the column is in the table ...
418                     String columnName = column.propertyName();
419                     if ("*".equals(columnName)) {
420                         // This is a 'SELECT *' on this source, but this source is one of multiple sources ...
421                         allColumnsFor(table, tableName, newColumns, newTypes);
422                     } else {
423                         // This is a particular column, so add it ...
424                         newColumns.add(column);
425                         org.modeshape.graph.query.validate.Schemata.Column schemaColumn = table.getColumn(columnName);
426                         if (schemaColumn != null) {
427                             newTypes.add(schemaColumn.getPropertyType());
428                         } else {
429                             newTypes.add(context.getTypeSystem().getStringFactory().getTypeName());
430                         }
431                     }
432                     if (table.getColumn(columnName) == null && context.getHints().validateColumnExistance) {
433                         context.getProblems().addError(GraphI18n.columnDoesNotExistOnTable, columnName, tableName);
434                     }
435                 }
436             }
437         }
438         projectNode.setProperty(Property.PROJECT_COLUMNS, newColumns);
439         projectNode.setProperty(Property.PROJECT_COLUMN_TYPES, newTypes);
440         projectNode.addLastChild(plan);
441         return projectNode;
442     }
443 
444     protected void allColumnsFor( Table table,
445                                   SelectorName tableName,
446                                   List<Column> columns,
447                                   List<String> columnTypes ) {
448         // Compute the columns from this selector ...
449         for (Schemata.Column column : table.getColumns()) {
450             String columnName = column.getName();
451             String propertyName = columnName;
452             Column newColumn = new Column(tableName, propertyName, columnName);
453             columns.add(newColumn);
454             columnTypes.add(column.getPropertyType());
455         }
456     }
457 
458     /**
459      * Attach DUP_REMOVE node at top of tree. The DUP_REMOVE may be pushed down to a source (or sources) if possible by the
460      * optimizer.
461      * 
462      * @param context the context in which the query is being planned
463      * @param plan the existing plan
464      * @return the updated plan
465      */
466     protected PlanNode attachDuplicateRemoval( QueryContext context,
467                                                PlanNode plan ) {
468         PlanNode dupNode = new PlanNode(Type.DUP_REMOVE);
469         plan.setParent(dupNode);
470         return dupNode;
471     }
472 
473 }