View Javadoc

1   /*
2    * ModeShape (http://www.modeshape.org)
3    * See the COPYRIGHT.txt file distributed with this work for information
4    * regarding copyright ownership.  Some portions may be licensed
5    * to Red Hat, Inc. under one or more contributor license agreements.
6    * See the AUTHORS.txt file in the distribution for a full listing of 
7    * individual contributors.
8    *
9    * ModeShape is free software. Unless otherwise indicated, all code in ModeShape
10   * is licensed to you under the terms of the GNU Lesser General Public License as
11   * published by the Free Software Foundation; either version 2.1 of
12   * the License, or (at your option) any later version.
13   * 
14   * ModeShape is distributed in the hope that it will be useful,
15   * but WITHOUT ANY WARRANTY; without even the implied warranty of
16   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17   * Lesser General Public License for more details.
18   *
19   * You should have received a copy of the GNU Lesser General Public
20   * License along with this software; if not, write to the Free
21   * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
22   * 02110-1301 USA, or see the FSF site: http://www.fsf.org.
23   */
24  package org.modeshape.search.lucene.query;
25  
26  import java.io.IOException;
27  import java.util.Set;
28  import org.apache.lucene.document.Document;
29  import org.apache.lucene.document.FieldSelector;
30  import org.apache.lucene.document.FieldSelectorResult;
31  import org.apache.lucene.index.IndexReader;
32  import org.apache.lucene.search.Explanation;
33  import org.apache.lucene.search.Query;
34  import org.apache.lucene.search.Scorer;
35  import org.apache.lucene.search.Searcher;
36  import org.apache.lucene.search.Similarity;
37  import org.apache.lucene.search.Weight;
38  
39  /**
40   * A Lucene {@link Query} implementation that is used to score positively those documents that have a ID in the supplied set. This
41   * works for large sets of IDs; in smaller numbers, it may be more efficient to create a boolean query that checks for each of the
42   * IDs.
43   */
44  public class IdsQuery extends Query {
45  
46      private static final long serialVersionUID = 1L;
47  
48      /**
49       * The operand that is being negated by this query.
50       */
51      protected final Set<String> uuids;
52      protected final FieldSelector fieldSelector;
53      protected final String fieldName;
54  
55      /**
56       * Construct a {@link Query} implementation that scores nodes according to the supplied comparator.
57       * 
58       * @param fieldName the name of the document field containing the value; may not be null
59       * @param ids the set of ID values; may not be null
60       */
61      public IdsQuery( String fieldName,
62                       Set<String> ids ) {
63          this.fieldName = fieldName;
64          this.uuids = ids;
65          assert this.fieldName != null;
66          assert this.uuids != null;
67          this.fieldSelector = new FieldSelector() {
68              private static final long serialVersionUID = 1L;
69  
70              public FieldSelectorResult accept( String fieldName ) {
71                  return fieldName.equals(fieldName) ? FieldSelectorResult.LOAD_AND_BREAK : FieldSelectorResult.NO_LOAD;
72              }
73          };
74      }
75  
76      protected boolean includeDocument( IndexReader reader,
77                                         int docId ) throws IOException {
78          Document doc = reader.document(docId, fieldSelector);
79          String valueString = doc.get(fieldName);
80          return valueString != null && uuids.contains(valueString);
81      }
82  
83      /**
84       * {@inheritDoc}
85       * 
86       * @see org.apache.lucene.search.Query#createWeight(org.apache.lucene.search.Searcher)
87       */
88      @Override
89      public Weight createWeight( Searcher searcher ) {
90          return new IdSetWeight(searcher);
91      }
92  
93      /**
94       * {@inheritDoc}
95       * 
96       * @see org.apache.lucene.search.Query#toString(java.lang.String)
97       */
98      @Override
99      public String toString( String field ) {
100         return fieldName + " IN UUIDs";
101     }
102 
103     /**
104      * Calculates query weights and builds query scores for our NOT queries.
105      */
106     protected class IdSetWeight extends Weight {
107         private static final long serialVersionUID = 1L;
108         private final Searcher searcher;
109 
110         protected IdSetWeight( Searcher searcher ) {
111             this.searcher = searcher;
112             assert this.searcher != null;
113         }
114 
115         /**
116          * {@inheritDoc}
117          * 
118          * @see org.apache.lucene.search.Weight#getQuery()
119          */
120         @Override
121         public Query getQuery() {
122             return IdsQuery.this;
123         }
124 
125         /**
126          * {@inheritDoc}
127          * <p>
128          * This implementation always returns a weight factor of 1.0.
129          * </p>
130          * 
131          * @see org.apache.lucene.search.Weight#getValue()
132          */
133         @Override
134         public float getValue() {
135             return 1.0f; // weight factor of 1.0
136         }
137 
138         /**
139          * {@inheritDoc}
140          * <p>
141          * This implementation always returns a normalization factor of 1.0.
142          * </p>
143          * 
144          * @see org.apache.lucene.search.Weight#sumOfSquaredWeights()
145          */
146         @Override
147         public float sumOfSquaredWeights() {
148             return 1.0f; // normalization factor of 1.0
149         }
150 
151         /**
152          * {@inheritDoc}
153          * <p>
154          * This implementation always does nothing, as there is nothing to normalize.
155          * </p>
156          * 
157          * @see org.apache.lucene.search.Weight#normalize(float)
158          */
159         @Override
160         public void normalize( float norm ) {
161             // No need to do anything here
162         }
163 
164         /**
165          * {@inheritDoc}
166          * 
167          * @see org.apache.lucene.search.Weight#scorer(org.apache.lucene.index.IndexReader, boolean, boolean)
168          */
169         @Override
170         public Scorer scorer( IndexReader reader,
171                               boolean scoreDocsInOrder,
172                               boolean topScorer ) {
173             // Return a custom scorer ...
174             return new IdScorer(reader);
175         }
176 
177         /**
178          * {@inheritDoc}
179          * 
180          * @see org.apache.lucene.search.Weight#explain(org.apache.lucene.index.IndexReader, int)
181          */
182         @Override
183         public Explanation explain( IndexReader reader,
184                                     int doc ) {
185             return new Explanation(getValue(), getQuery().toString());
186         }
187     }
188 
189     /**
190      * A scorer for the Path query.
191      */
192     protected class IdScorer extends Scorer {
193         private int docId = -1;
194         private final int pastMaxDocId;
195         private final IndexReader reader;
196 
197         protected IdScorer( IndexReader reader ) {
198             // We don't care which Similarity we have, because we don't use it. So get the default.
199             super(Similarity.getDefault());
200             this.reader = reader;
201             assert this.reader != null;
202             this.pastMaxDocId = this.reader.maxDoc();
203         }
204 
205         /**
206          * {@inheritDoc}
207          * 
208          * @see org.apache.lucene.search.DocIdSetIterator#docID()
209          */
210         @Override
211         public int docID() {
212             return docId;
213         }
214 
215         /**
216          * {@inheritDoc}
217          * 
218          * @see org.apache.lucene.search.DocIdSetIterator#nextDoc()
219          */
220         @Override
221         public int nextDoc() throws IOException {
222             do {
223                 ++docId;
224                 if (docId == pastMaxDocId) return Scorer.NO_MORE_DOCS;
225                 if (reader.isDeleted(docId)) {
226                     // We should skip this document ...
227                     continue;
228                 }
229                 if (includeDocument(reader, docId)) return docId;
230             } while (true);
231         }
232 
233         /**
234          * {@inheritDoc}
235          * 
236          * @see org.apache.lucene.search.DocIdSetIterator#advance(int)
237          */
238         @Override
239         public int advance( int target ) throws IOException {
240             if (target == Scorer.NO_MORE_DOCS) return target;
241             while (true) {
242                 int doc = nextDoc();
243                 if (doc >= target) return doc;
244             }
245         }
246 
247         /**
248          * {@inheritDoc}
249          * <p>
250          * This method always returns a score of 1.0 for the current document, since only those documents that satisfy the NOT are
251          * scored by this scorer.
252          * </p>
253          * 
254          * @see org.apache.lucene.search.Scorer#score()
255          */
256         @Override
257         public float score() {
258             return 1.0f;
259         }
260     }
261 }