View Javadoc

1   /*
2    * ModeShape (http://www.modeshape.org)
3    * See the COPYRIGHT.txt file distributed with this work for information
4    * regarding copyright ownership.  Some portions may be licensed
5    * to Red Hat, Inc. under one or more contributor license agreements.
6    * See the AUTHORS.txt file in the distribution for a full listing of 
7    * individual contributors.
8    *
9    * ModeShape is free software. Unless otherwise indicated, all code in ModeShape
10   * is licensed to you under the terms of the GNU Lesser General Public License as
11   * published by the Free Software Foundation; either version 2.1 of
12   * the License, or (at your option) any later version.
13   * 
14   * ModeShape is distributed in the hope that it will be useful,
15   * but WITHOUT ANY WARRANTY; without even the implied warranty of
16   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17   * Lesser General Public License for more details.
18   *
19   * You should have received a copy of the GNU Lesser General Public
20   * License along with this software; if not, write to the Free
21   * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
22   * 02110-1301 USA, or see the FSF site: http://www.fsf.org.
23   */
24  package org.modeshape.graph.query.optimize;
25  
26  import java.util.Collections;
27  import java.util.EnumSet;
28  import java.util.HashSet;
29  import java.util.LinkedList;
30  import java.util.List;
31  import java.util.Set;
32  import net.jcip.annotations.Immutable;
33  import org.modeshape.graph.query.QueryContext;
34  import org.modeshape.graph.query.model.Constraint;
35  import org.modeshape.graph.query.model.JoinType;
36  import org.modeshape.graph.query.model.SelectorName;
37  import org.modeshape.graph.query.plan.PlanNode;
38  import org.modeshape.graph.query.plan.PlanNode.Property;
39  import org.modeshape.graph.query.plan.PlanNode.Type;
40  
41  /**
42   * An {@link OptimizerRule optimizer rule} that attempts to push the criteria nodes in a canonical plan down as far as possible.
43   * <p>
44   * For example, here is a single-access plan before:
45   * 
46   * <pre>
47   *          ...
48   *           |
49   *        PROJECT      with the list of columns being SELECTed
50   *           |
51   *        SELECT1
52   *           |         One or more SELECT plan nodes that each have
53   *        SELECT2      a single non-join constraint that are then all AND-ed
54   *           |         together
55   *        SELECTn
56   *           |
57   *        ACCESS
58   *           |
59   *        SOURCE
60   * </pre>
61   * 
62   * And after:
63   * 
64   * <pre>
65   *          ...
66   *           |
67   *        ACCESS
68   *           |
69   *        PROJECT      with the list of columns being SELECTed
70   *           |
71   *        SELECT1
72   *           |         One or more SELECT plan nodes that each have
73   *        SELECT2      a single non-join constraint that are then all AND-ed
74   *           |         together
75   *        SELECTn
76   *           |
77   *        SOURCE
78   * </pre>
79   * 
80   * Here is another case, where multiple SELECT nodes above a simple JOIN and where each SELECT node applies to one or more of the
81   * SOURCE nodes (via the named selectors). Each SELECT node that applies to a single selector will get pushed toward that source,
82   * but will have the same order relative to other SELECT nodes also pushed toward that SOURCE. However, this rules does not push
83   * SELECT nodes that apply to multiple selectors.
84   * </p>
85   * <p>
86   * Before:
87   * 
88   * <pre>
89   *          ...
90   *           |
91   *        PROJECT ('s1','s2')      with the list of columns being SELECTed (from 's1' and 's2' selectors)
92   *           |
93   *        SELECT1 ('s1')
94   *           |                     One or more SELECT plan nodes that each have
95   *        SELECT2 ('s2')           a single non-join constraint that are then all AND-ed
96   *           |                     together, and that each have the selector(s) they apply to
97   *        SELECT3 ('s1','s2')
98   *           |
99   *        SELECT4 ('s1')
100  *           |
101  *         JOIN ('s1','s2')
102  *        /     \
103  *       /       \
104  *   ACCESS     ACCESS
105  *    ('s1')    ('s2')
106  *     |           |
107  *   PROJECT    PROJECT
108  *    ('s1')    ('s2')
109  *     |           |
110  *   SOURCE     SOURCE
111  *    ('s1')    ('s2')
112  * </pre>
113  * 
114  * And after:
115  * 
116  * <pre>
117  *          ...
118  *           |
119  *        PROJECT ('s1','s2')      with the list of columns being SELECTed (from 's1' and 's2' selectors)
120  *           |
121  *        SELECT3 ('s1','s2')      Any SELECT plan nodes that apply to multiple selectors are left above
122  *           |                     the ACCESS nodes.
123  *         JOIN ('s1','s2')
124  *        /     \
125  *       /       \
126  *   ACCESS     ACCESS
127  *   ('s1')     ('s2')
128  *     |           |
129  *  PROJECT     PROJECT
130  *   ('s1')     ('s2')
131  *     |           |
132  *  SELECT1     SELECT2
133  *   ('s1')     ('s2')
134  *     |           |
135  *  SELECT4     SOURCE
136  *   ('s1')     ('s2')
137  *     |
138  *   SOURCE
139  *   ('s1')
140  * </pre>
141  * 
142  * </p>
143  * <p>
144  * Also, any SELECT that applies to one side of an equi-join will be applied to <i>both</i> sides of the JOIN.
145  * </p>
146  */
147 @Immutable
148 public class PushSelectCriteria implements OptimizerRule {
149 
150     public static final PushSelectCriteria INSTANCE = new PushSelectCriteria();
151     private static final Set<Type> ORIGINATING_TYPES = Collections.unmodifiableSet(EnumSet.of(Type.NULL,
152                                                                                               Type.SOURCE,
153                                                                                               Type.JOIN,
154                                                                                               Type.SET_OPERATION));
155 
156     /**
157      * {@inheritDoc}
158      * 
159      * @see org.modeshape.graph.query.optimize.OptimizerRule#execute(org.modeshape.graph.query.QueryContext,
160      *      org.modeshape.graph.query.plan.PlanNode, java.util.LinkedList)
161      */
162     public PlanNode execute( QueryContext context,
163                              PlanNode plan,
164                              LinkedList<OptimizerRule> ruleStack ) {
165         // Create set of nodes that no longer need to be considered
166         Set<PlanNode> deadNodes = new HashSet<PlanNode>();
167 
168         // Loop while criteria nodes are still being moved ...
169         boolean movedSomeNode = true;
170         while (movedSomeNode) {
171 
172             // Reset flag to false for this iteration
173             movedSomeNode = false;
174 
175             // Find all of the criteria (SELECT) nodes that can be pushed ...
176             List<PlanNode> criteriaNodes = plan.findAllAtOrBelow(Type.SELECT);
177 
178             // Find all of the NULL, SOURCE, SET_OPERATION or JOIN nodes, ordered correctly; we'll use this
179             // to look for the node on which each criteria can apply ...
180             List<PlanNode> originatingNodes = plan.findAllAtOrBelow(ORIGINATING_TYPES);
181 
182             // Starting with the lowest one first ...
183             Collections.reverse(criteriaNodes);
184             for (PlanNode criteriaNode : criteriaNodes) {
185                 // Skip any node we've already tried and failed to move ...
186                 if (deadNodes.contains(criteriaNode)) continue;
187 
188                 // Find the first originating node that has all of the required selectors for this criteria ...
189                 PlanNode originatingNode = findOriginatingNode(criteriaNode, originatingNodes);
190                 if (originatingNode == null || originatingNode == criteriaNode) {
191                     deadNodes.add(criteriaNode);
192                     continue;
193                 }
194 
195                 // Try to push the criteria node down ...
196                 if (!pushTowardsOriginatingNode(criteriaNode, originatingNode)) {
197                     // criteria node could not be moved ...
198                     deadNodes.add(criteriaNode);
199                     continue;
200                 }
201 
202                 // The criteria node was indeed moved, but it may need to be adjusted ...
203                 boolean adjusted = false;
204                 switch (originatingNode.getType()) {
205                     case SOURCE:
206 
207                         break;
208                     case JOIN:
209                         if (!criteriaNode.hasAncestorOfType(Type.ACCESS)) {
210                             // Try to push down the join criteria (only when above ACCESS nodes) ...
211                             adjusted = pushDownJoinCriteria(criteriaNode, originatingNode);
212                         }
213                         break;
214                     default:
215                         // Nothing to change ...
216                 }
217                 if (adjusted) {
218                     // We changed something, so make sure we go through the loop again ...
219                     movedSomeNode = true;
220                 } else {
221                     // Nothing was changed from the push-down, so consider this criteria node as processed ...
222                     deadNodes.add(criteriaNode);
223                 }
224             }
225         }
226         return plan;
227     }
228 
229     /**
230      * Attempt to push down criteria that applies to the JOIN as additional constraints on the JOIN itself.
231      * 
232      * @param criteriaNode the SELECT node; may not be null
233      * @param joinNode the JOIN node; may not be null
234      * @return true if the criteria was pushed down, or false otherwise
235      */
236     protected boolean pushDownJoinCriteria( PlanNode criteriaNode,
237                                             PlanNode joinNode ) {
238         JoinType joinType = (JoinType)joinNode.getProperty(Property.JOIN_TYPE);
239 
240         switch (joinType) {
241             case CROSS:
242                 joinNode.setProperty(Property.JOIN_TYPE, JoinType.INNER);
243                 moveCriteriaIntoOnClause(criteriaNode, joinNode);
244                 break;
245             case INNER:
246                 moveCriteriaIntoOnClause(criteriaNode, joinNode);
247                 break;
248             default:
249                 // This is where we could attempt to optimize the join type ...
250                 // if (optimizeJoinType(criteriaNode, joinNode) == JoinType.INNER) {
251                 // // The join type has changed ...
252                 // moveCriteriaIntoOnClause(criteriaNode, joinNode);
253                 // return true; // since the join type has changed ...
254                 // }
255         }
256         return false;
257     }
258 
259     /**
260      * Move the criteria that applies to the join to be included in the actual join criteria.
261      * 
262      * @param criteriaNode the SELECT node; may not be null
263      * @param joinNode the JOIN node; may not be null
264      */
265     private void moveCriteriaIntoOnClause( PlanNode criteriaNode,
266                                            PlanNode joinNode ) {
267         List<Constraint> constraints = joinNode.getPropertyAsList(Property.JOIN_CONSTRAINTS, Constraint.class);
268         Constraint criteria = criteriaNode.getProperty(Property.SELECT_CRITERIA, Constraint.class);
269 
270         // since the parser uses EMPTY_LIST, check for size 0 also
271         if (constraints == null || constraints.isEmpty()) {
272             constraints = new LinkedList<Constraint>();
273             joinNode.setProperty(Property.JOIN_CONSTRAINTS, constraints);
274         }
275 
276         if (!constraints.contains(criteria)) {
277             constraints.add(criteria);
278             if (criteriaNode.hasBooleanProperty(Property.IS_DEPENDENT)) {
279                 joinNode.setProperty(Property.IS_DEPENDENT, Boolean.TRUE);
280             }
281         }
282         criteriaNode.extractFromParent();
283     }
284 
285     /**
286      * Find the first node that has all of the same {@link PlanNode#getSelectors() selectors} as the supplied criteria.
287      * 
288      * @param criteriaNode the criteria
289      * @param originatingNodes the list of nodes to search
290      * @return the first (highest) node that is uses all of the same selectors as the criteria, or null if no such node could be
291      *         found
292      */
293     protected PlanNode findOriginatingNode( PlanNode criteriaNode,
294                                             List<PlanNode> originatingNodes ) {
295         Set<SelectorName> requiredSelectors = criteriaNode.getSelectors();
296         if (requiredSelectors.isEmpty()) return criteriaNode;
297 
298         // first look for originating nodes that exactly match the required selectors ...
299         for (PlanNode originatingNode : originatingNodes) {
300             if (!criteriaNode.isAbove(originatingNode)) continue;
301             if (originatingNode.getSelectors().equals(requiredSelectors)) return originatingNode;
302         }
303 
304         // Nothing matched exactly, so can we push down to a node that contain all of the required selectors ...
305         for (PlanNode originatingNode : originatingNodes) {
306             if (originatingNode.getSelectors().containsAll(requiredSelectors)) return originatingNode;
307         }
308         return null;
309     }
310 
311     /**
312      * Push the criteria node as close to the originating node as possible. In general, the criteria node usually ends up being
313      * moved to be a parent of the supplied originating node, except in a couple of cases:
314      * <ul>
315      * <li>There are already criteria nodes immediately above the originating node; in this case, the supplied criteria node is
316      * placed above all these existing criteria nodes.</li>
317      * <li>The originating node is below a JOIN node that is itself below an ACCESS node; in this case, the criteria node is
318      * placed immediately above the JOIN node.</li>
319      * <li>The originating node is below a LIMIT with a single SORT child node; in this case, the criteria node is placed
320      * immediately above the LIMIT node.</li>
321      * </ul>
322      * 
323      * @param criteriaNode the criteria node that is to be pushed down; may not be null
324      * @param originatingNode the target node that represents the node above which the criteria node should be inserted; may not
325      *        be null
326      * @return true if the criteria node was pushed down, or false if the criteria node could not be pushed down
327      */
328     protected boolean pushTowardsOriginatingNode( PlanNode criteriaNode,
329                                                   PlanNode originatingNode ) {
330         // To keep things stable, 'originatingNode' should be the top-most SELECT (criteria) node above a SOURCE ...
331         while (originatingNode.getParent().getType() == Type.SELECT) {
332             originatingNode = originatingNode.getParent();
333             if (originatingNode == criteriaNode) return false;
334         }
335 
336         // Find out the best node above which the criteria node should be placed ...
337         PlanNode bestChild = findBestChildForCriteria(criteriaNode, originatingNode);
338         if (bestChild == criteriaNode) return false;
339         criteriaNode.extractFromParent();
340         bestChild.insertAsParent(criteriaNode);
341         assert atBoundary(criteriaNode, originatingNode);
342         return true;
343     }
344 
345     protected PlanNode findBestChildForCriteria( PlanNode criteriaNode,
346                                                  PlanNode originatingNode ) {
347         // Walk the nodes, from the criteria node down to the originating node ...
348         for (PlanNode node : criteriaNode.getPathTo(originatingNode)) {
349             // Check the node to see if there is any reason why the node cannot be pushed
350             if (node.getType() == Type.JOIN) {
351                 // Pushing below a JOIN is not necessary under an ACCESS node
352                 if (node.hasAncestorOfType(Type.ACCESS)) return node;
353             } else if (node.getType() == Type.LIMIT) {
354                 // Don't push below a LIMIT above a SORT ...
355                 if (node.getChildCount() == 1 && node.getFirstChild().getType() == Type.SORT) {
356                     return node;
357                 }
358             }
359         }
360         return originatingNode;
361     }
362 
363     /**
364      * Determine whether all of the nodes between the criteria node and its originating node are criteria (SELECT) nodes.
365      * 
366      * @param criteriaNode the criteria node; may not be null
367      * @param originatingNode the originating node
368      * @return true if all nodes between the criteria and originating nodes are SELECT nodes
369      */
370     protected boolean atBoundary( PlanNode criteriaNode,
371                                   PlanNode originatingNode ) {
372         // Walk from source node to critNode to check each intervening node
373         PlanNode currentNode = originatingNode.getParent();
374         while (currentNode != criteriaNode) {
375             if (currentNode.getType() != Type.SELECT) return false;
376             currentNode = currentNode.getParent();
377         }
378         return true;
379     }
380 
381     /**
382      * {@inheritDoc}
383      * 
384      * @see java.lang.Object#toString()
385      */
386     @Override
387     public String toString() {
388         return getClass().getSimpleName();
389     }
390 
391 }