1 /*
2 * ModeShape (http://www.modeshape.org)
3 * See the COPYRIGHT.txt file distributed with this work for information
4 * regarding copyright ownership. Some portions may be licensed
5 * to Red Hat, Inc. under one or more contributor license agreements.
6 * See the AUTHORS.txt file in the distribution for a full listing of
7 * individual contributors.
8 *
9 * ModeShape is free software. Unless otherwise indicated, all code in ModeShape
10 * is licensed to you under the terms of the GNU Lesser General Public License as
11 * published by the Free Software Foundation; either version 2.1 of
12 * the License, or (at your option) any later version.
13 *
14 * ModeShape is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this software; if not, write to the Free
21 * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
22 * 02110-1301 USA, or see the FSF site: http://www.fsf.org.
23 */
24 package org.modeshape.graph.query.optimize;
25
26 import java.util.Collections;
27 import java.util.EnumSet;
28 import java.util.HashSet;
29 import java.util.LinkedList;
30 import java.util.List;
31 import java.util.Set;
32 import net.jcip.annotations.Immutable;
33 import org.modeshape.graph.query.QueryContext;
34 import org.modeshape.graph.query.model.Constraint;
35 import org.modeshape.graph.query.model.JoinType;
36 import org.modeshape.graph.query.model.SelectorName;
37 import org.modeshape.graph.query.plan.PlanNode;
38 import org.modeshape.graph.query.plan.PlanNode.Property;
39 import org.modeshape.graph.query.plan.PlanNode.Type;
40
41 /**
42 * An {@link OptimizerRule optimizer rule} that attempts to push the criteria nodes in a canonical plan down as far as possible.
43 * <p>
44 * For example, here is a single-access plan before:
45 *
46 * <pre>
47 * ...
48 * |
49 * PROJECT with the list of columns being SELECTed
50 * |
51 * SELECT1
52 * | One or more SELECT plan nodes that each have
53 * SELECT2 a single non-join constraint that are then all AND-ed
54 * | together
55 * SELECTn
56 * |
57 * ACCESS
58 * |
59 * SOURCE
60 * </pre>
61 *
62 * And after:
63 *
64 * <pre>
65 * ...
66 * |
67 * ACCESS
68 * |
69 * PROJECT with the list of columns being SELECTed
70 * |
71 * SELECT1
72 * | One or more SELECT plan nodes that each have
73 * SELECT2 a single non-join constraint that are then all AND-ed
74 * | together
75 * SELECTn
76 * |
77 * SOURCE
78 * </pre>
79 *
80 * Here is another case, where multiple SELECT nodes above a simple JOIN and where each SELECT node applies to one or more of the
81 * SOURCE nodes (via the named selectors). Each SELECT node that applies to a single selector will get pushed toward that source,
82 * but will have the same order relative to other SELECT nodes also pushed toward that SOURCE. However, this rules does not push
83 * SELECT nodes that apply to multiple selectors.
84 * </p>
85 * <p>
86 * Before:
87 *
88 * <pre>
89 * ...
90 * |
91 * PROJECT ('s1','s2') with the list of columns being SELECTed (from 's1' and 's2' selectors)
92 * |
93 * SELECT1 ('s1')
94 * | One or more SELECT plan nodes that each have
95 * SELECT2 ('s2') a single non-join constraint that are then all AND-ed
96 * | together, and that each have the selector(s) they apply to
97 * SELECT3 ('s1','s2')
98 * |
99 * SELECT4 ('s1')
100 * |
101 * JOIN ('s1','s2')
102 * / \
103 * / \
104 * ACCESS ACCESS
105 * ('s1') ('s2')
106 * | |
107 * PROJECT PROJECT
108 * ('s1') ('s2')
109 * | |
110 * SOURCE SOURCE
111 * ('s1') ('s2')
112 * </pre>
113 *
114 * And after:
115 *
116 * <pre>
117 * ...
118 * |
119 * PROJECT ('s1','s2') with the list of columns being SELECTed (from 's1' and 's2' selectors)
120 * |
121 * SELECT3 ('s1','s2') Any SELECT plan nodes that apply to multiple selectors are left above
122 * | the ACCESS nodes.
123 * JOIN ('s1','s2')
124 * / \
125 * / \
126 * ACCESS ACCESS
127 * ('s1') ('s2')
128 * | |
129 * PROJECT PROJECT
130 * ('s1') ('s2')
131 * | |
132 * SELECT1 SELECT2
133 * ('s1') ('s2')
134 * | |
135 * SELECT4 SOURCE
136 * ('s1') ('s2')
137 * |
138 * SOURCE
139 * ('s1')
140 * </pre>
141 *
142 * </p>
143 * <p>
144 * Also, any SELECT that applies to one side of an equi-join will be applied to <i>both</i> sides of the JOIN.
145 * </p>
146 */
147 @Immutable
148 public class PushSelectCriteria implements OptimizerRule {
149
150 public static final PushSelectCriteria INSTANCE = new PushSelectCriteria();
151 private static final Set<Type> ORIGINATING_TYPES = Collections.unmodifiableSet(EnumSet.of(Type.NULL,
152 Type.SOURCE,
153 Type.JOIN,
154 Type.SET_OPERATION));
155
156 /**
157 * {@inheritDoc}
158 *
159 * @see org.modeshape.graph.query.optimize.OptimizerRule#execute(org.modeshape.graph.query.QueryContext,
160 * org.modeshape.graph.query.plan.PlanNode, java.util.LinkedList)
161 */
162 public PlanNode execute( QueryContext context,
163 PlanNode plan,
164 LinkedList<OptimizerRule> ruleStack ) {
165 // Create set of nodes that no longer need to be considered
166 Set<PlanNode> deadNodes = new HashSet<PlanNode>();
167
168 // Loop while criteria nodes are still being moved ...
169 boolean movedSomeNode = true;
170 while (movedSomeNode) {
171
172 // Reset flag to false for this iteration
173 movedSomeNode = false;
174
175 // Find all of the criteria (SELECT) nodes that can be pushed ...
176 List<PlanNode> criteriaNodes = plan.findAllAtOrBelow(Type.SELECT);
177
178 // Find all of the NULL, SOURCE, SET_OPERATION or JOIN nodes, ordered correctly; we'll use this
179 // to look for the node on which each criteria can apply ...
180 List<PlanNode> originatingNodes = plan.findAllAtOrBelow(ORIGINATING_TYPES);
181
182 // Starting with the lowest one first ...
183 Collections.reverse(criteriaNodes);
184 for (PlanNode criteriaNode : criteriaNodes) {
185 // Skip any node we've already tried and failed to move ...
186 if (deadNodes.contains(criteriaNode)) continue;
187
188 // Find the first originating node that has all of the required selectors for this criteria ...
189 PlanNode originatingNode = findOriginatingNode(criteriaNode, originatingNodes);
190 if (originatingNode == null || originatingNode == criteriaNode) {
191 deadNodes.add(criteriaNode);
192 continue;
193 }
194
195 // Try to push the criteria node down ...
196 if (!pushTowardsOriginatingNode(criteriaNode, originatingNode)) {
197 // criteria node could not be moved ...
198 deadNodes.add(criteriaNode);
199 continue;
200 }
201
202 // The criteria node was indeed moved, but it may need to be adjusted ...
203 boolean adjusted = false;
204 switch (originatingNode.getType()) {
205 case SOURCE:
206
207 break;
208 case JOIN:
209 if (!criteriaNode.hasAncestorOfType(Type.ACCESS)) {
210 // Try to push down the join criteria (only when above ACCESS nodes) ...
211 adjusted = pushDownJoinCriteria(criteriaNode, originatingNode);
212 }
213 break;
214 default:
215 // Nothing to change ...
216 }
217 if (adjusted) {
218 // We changed something, so make sure we go through the loop again ...
219 movedSomeNode = true;
220 } else {
221 // Nothing was changed from the push-down, so consider this criteria node as processed ...
222 deadNodes.add(criteriaNode);
223 }
224 }
225 }
226 return plan;
227 }
228
229 /**
230 * Attempt to push down criteria that applies to the JOIN as additional constraints on the JOIN itself.
231 *
232 * @param criteriaNode the SELECT node; may not be null
233 * @param joinNode the JOIN node; may not be null
234 * @return true if the criteria was pushed down, or false otherwise
235 */
236 protected boolean pushDownJoinCriteria( PlanNode criteriaNode,
237 PlanNode joinNode ) {
238 JoinType joinType = (JoinType)joinNode.getProperty(Property.JOIN_TYPE);
239
240 switch (joinType) {
241 case CROSS:
242 joinNode.setProperty(Property.JOIN_TYPE, JoinType.INNER);
243 moveCriteriaIntoOnClause(criteriaNode, joinNode);
244 break;
245 case INNER:
246 moveCriteriaIntoOnClause(criteriaNode, joinNode);
247 break;
248 default:
249 // This is where we could attempt to optimize the join type ...
250 // if (optimizeJoinType(criteriaNode, joinNode) == JoinType.INNER) {
251 // // The join type has changed ...
252 // moveCriteriaIntoOnClause(criteriaNode, joinNode);
253 // return true; // since the join type has changed ...
254 // }
255 }
256 return false;
257 }
258
259 /**
260 * Move the criteria that applies to the join to be included in the actual join criteria.
261 *
262 * @param criteriaNode the SELECT node; may not be null
263 * @param joinNode the JOIN node; may not be null
264 */
265 private void moveCriteriaIntoOnClause( PlanNode criteriaNode,
266 PlanNode joinNode ) {
267 List<Constraint> constraints = joinNode.getPropertyAsList(Property.JOIN_CONSTRAINTS, Constraint.class);
268 Constraint criteria = criteriaNode.getProperty(Property.SELECT_CRITERIA, Constraint.class);
269
270 // since the parser uses EMPTY_LIST, check for size 0 also
271 if (constraints == null || constraints.isEmpty()) {
272 constraints = new LinkedList<Constraint>();
273 joinNode.setProperty(Property.JOIN_CONSTRAINTS, constraints);
274 }
275
276 if (!constraints.contains(criteria)) {
277 constraints.add(criteria);
278 if (criteriaNode.hasBooleanProperty(Property.IS_DEPENDENT)) {
279 joinNode.setProperty(Property.IS_DEPENDENT, Boolean.TRUE);
280 }
281 }
282 criteriaNode.extractFromParent();
283 }
284
285 /**
286 * Find the first node that has all of the same {@link PlanNode#getSelectors() selectors} as the supplied criteria.
287 *
288 * @param criteriaNode the criteria
289 * @param originatingNodes the list of nodes to search
290 * @return the first (highest) node that is uses all of the same selectors as the criteria, or null if no such node could be
291 * found
292 */
293 protected PlanNode findOriginatingNode( PlanNode criteriaNode,
294 List<PlanNode> originatingNodes ) {
295 Set<SelectorName> requiredSelectors = criteriaNode.getSelectors();
296 if (requiredSelectors.isEmpty()) return criteriaNode;
297
298 // first look for originating nodes that exactly match the required selectors ...
299 for (PlanNode originatingNode : originatingNodes) {
300 if (!criteriaNode.isAbove(originatingNode)) continue;
301 if (originatingNode.getSelectors().equals(requiredSelectors)) return originatingNode;
302 }
303
304 // Nothing matched exactly, so can we push down to a node that contain all of the required selectors ...
305 for (PlanNode originatingNode : originatingNodes) {
306 if (originatingNode.getSelectors().containsAll(requiredSelectors)) return originatingNode;
307 }
308 return null;
309 }
310
311 /**
312 * Push the criteria node as close to the originating node as possible. In general, the criteria node usually ends up being
313 * moved to be a parent of the supplied originating node, except in a couple of cases:
314 * <ul>
315 * <li>There are already criteria nodes immediately above the originating node; in this case, the supplied criteria node is
316 * placed above all these existing criteria nodes.</li>
317 * <li>The originating node is below a JOIN node that is itself below an ACCESS node; in this case, the criteria node is
318 * placed immediately above the JOIN node.</li>
319 * <li>The originating node is below a LIMIT with a single SORT child node; in this case, the criteria node is placed
320 * immediately above the LIMIT node.</li>
321 * </ul>
322 *
323 * @param criteriaNode the criteria node that is to be pushed down; may not be null
324 * @param originatingNode the target node that represents the node above which the criteria node should be inserted; may not
325 * be null
326 * @return true if the criteria node was pushed down, or false if the criteria node could not be pushed down
327 */
328 protected boolean pushTowardsOriginatingNode( PlanNode criteriaNode,
329 PlanNode originatingNode ) {
330 // To keep things stable, 'originatingNode' should be the top-most SELECT (criteria) node above a SOURCE ...
331 while (originatingNode.getParent().getType() == Type.SELECT) {
332 originatingNode = originatingNode.getParent();
333 if (originatingNode == criteriaNode) return false;
334 }
335
336 // Find out the best node above which the criteria node should be placed ...
337 PlanNode bestChild = findBestChildForCriteria(criteriaNode, originatingNode);
338 if (bestChild == criteriaNode) return false;
339 criteriaNode.extractFromParent();
340 bestChild.insertAsParent(criteriaNode);
341 assert atBoundary(criteriaNode, originatingNode);
342 return true;
343 }
344
345 protected PlanNode findBestChildForCriteria( PlanNode criteriaNode,
346 PlanNode originatingNode ) {
347 // Walk the nodes, from the criteria node down to the originating node ...
348 for (PlanNode node : criteriaNode.getPathTo(originatingNode)) {
349 // Check the node to see if there is any reason why the node cannot be pushed
350 if (node.getType() == Type.JOIN) {
351 // Pushing below a JOIN is not necessary under an ACCESS node
352 if (node.hasAncestorOfType(Type.ACCESS)) return node;
353 } else if (node.getType() == Type.LIMIT) {
354 // Don't push below a LIMIT above a SORT ...
355 if (node.getChildCount() == 1 && node.getFirstChild().getType() == Type.SORT) {
356 return node;
357 }
358 }
359 }
360 return originatingNode;
361 }
362
363 /**
364 * Determine whether all of the nodes between the criteria node and its originating node are criteria (SELECT) nodes.
365 *
366 * @param criteriaNode the criteria node; may not be null
367 * @param originatingNode the originating node
368 * @return true if all nodes between the criteria and originating nodes are SELECT nodes
369 */
370 protected boolean atBoundary( PlanNode criteriaNode,
371 PlanNode originatingNode ) {
372 // Walk from source node to critNode to check each intervening node
373 PlanNode currentNode = originatingNode.getParent();
374 while (currentNode != criteriaNode) {
375 if (currentNode.getType() != Type.SELECT) return false;
376 currentNode = currentNode.getParent();
377 }
378 return true;
379 }
380
381 /**
382 * {@inheritDoc}
383 *
384 * @see java.lang.Object#toString()
385 */
386 @Override
387 public String toString() {
388 return getClass().getSimpleName();
389 }
390
391 }