1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24 package org.modeshape.search.lucene;
25
26 import java.io.File;
27 import java.text.DateFormat;
28 import java.text.SimpleDateFormat;
29 import java.util.HashMap;
30 import java.util.Iterator;
31 import java.util.LinkedList;
32 import java.util.List;
33 import java.util.Map;
34 import net.jcip.annotations.Immutable;
35 import org.apache.lucene.analysis.Analyzer;
36 import org.apache.lucene.analysis.standard.StandardAnalyzer;
37 import org.apache.lucene.document.Field;
38 import org.apache.lucene.util.Version;
39 import org.modeshape.common.text.FilenameEncoder;
40 import org.modeshape.common.text.TextEncoder;
41 import org.modeshape.common.text.UrlEncoder;
42 import org.modeshape.common.util.CheckArg;
43 import org.modeshape.graph.ExecutionContext;
44 import org.modeshape.graph.JcrLexicon;
45 import org.modeshape.graph.Location;
46 import org.modeshape.graph.ModeShapeLexicon;
47 import org.modeshape.graph.connector.RepositoryConnectionFactory;
48 import org.modeshape.graph.observe.Observer;
49 import org.modeshape.graph.property.Name;
50 import org.modeshape.graph.property.Path;
51 import org.modeshape.graph.property.Property;
52 import org.modeshape.graph.property.basic.JodaDateTime;
53 import org.modeshape.graph.request.ChangeRequest;
54 import org.modeshape.graph.request.CreateNodeRequest;
55 import org.modeshape.graph.request.DestroyWorkspaceRequest;
56 import org.modeshape.graph.request.RemovePropertyRequest;
57 import org.modeshape.graph.request.RequestType;
58 import org.modeshape.graph.request.SetPropertyRequest;
59 import org.modeshape.graph.request.UpdatePropertiesRequest;
60 import org.modeshape.graph.request.UpdateValuesRequest;
61 import org.modeshape.graph.search.SearchEngine;
62 import org.modeshape.graph.search.SearchEngineException;
63 import org.modeshape.graph.search.SearchEngineIndexer;
64
65
66
67
68
69
70
71 public class LuceneSearchEngine extends AbstractLuceneSearchEngine<LuceneSearchWorkspace, LuceneSearchProcessor> {
72
73
74
75
76
77
78
79
80 public static final IndexRules DEFAULT_RULES;
81
82 static {
83
84
85 long earliestChangeDate = new JodaDateTime(2009, 11, 01, 0, 0, 0, 0).getMilliseconds();
86
87 IndexRules.Builder builder = IndexRules.createBuilder();
88
89 builder.defaultTo(Field.Store.YES, Field.Index.ANALYZED, true, true);
90
91 builder.stringField(JcrLexicon.UUID, Field.Store.YES, Field.Index.NOT_ANALYZED, false, false);
92 builder.stringField(ModeShapeLexicon.UUID, Field.Store.YES, Field.Index.NOT_ANALYZED, false, false);
93
94 builder.dateField(JcrLexicon.CREATED, Field.Store.YES, Field.Index.NOT_ANALYZED, earliestChangeDate);
95 builder.dateField(JcrLexicon.LAST_MODIFIED, Field.Store.YES, Field.Index.NOT_ANALYZED, earliestChangeDate);
96 DEFAULT_RULES = builder.build();
97 }
98
99
100
101
102
103 protected static final TextEncoder DEFAULT_ENCODER = new FilenameEncoder();
104
105
106 protected ThreadLocal<DateFormat> dateFormatter = new ThreadLocal<DateFormat>() {
107 @Override
108 protected DateFormat initialValue() {
109 return new SimpleDateFormat("yyyyMMdd'T'HH:mm:ss");
110 }
111 };
112
113 private final LuceneConfiguration configuration;
114 private final IndexRules rules;
115 private final Analyzer analyzer;
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130 public LuceneSearchEngine( String sourceName,
131 RepositoryConnectionFactory connectionFactory,
132 boolean verifyWorkspaceInSource,
133 LuceneConfiguration configuration,
134 IndexRules rules,
135 Analyzer analyzer ) {
136 super(sourceName, connectionFactory, verifyWorkspaceInSource);
137 CheckArg.isNotNull(configuration, "configuration");
138 this.configuration = configuration;
139 this.analyzer = analyzer != null ? analyzer : new StandardAnalyzer(Version.LUCENE_30);
140 this.rules = rules != null ? rules : DEFAULT_RULES;
141 }
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168 public LuceneSearchEngine( String sourceName,
169 RepositoryConnectionFactory connectionFactory,
170 boolean verifyWorkspaceInSource,
171 File indexStorageDirectory,
172 IndexRules rules,
173 Analyzer analyzer ) {
174 this(sourceName, connectionFactory, verifyWorkspaceInSource, LuceneConfigurations.using(indexStorageDirectory,
175 null,
176 DEFAULT_ENCODER,
177 DEFAULT_ENCODER), null, null);
178 }
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200 public LuceneSearchEngine( String sourceName,
201 RepositoryConnectionFactory connectionFactory,
202 boolean verifyWorkspaceInSource,
203 IndexRules rules,
204 Analyzer analyzer ) {
205 this(sourceName, connectionFactory, verifyWorkspaceInSource, LuceneConfigurations.inMemory(), null, null);
206 }
207
208
209
210
211
212
213
214 @Override
215 protected LuceneSearchProcessor createProcessor( ExecutionContext context,
216 Workspaces<LuceneSearchWorkspace> workspaces,
217 Observer observer,
218 boolean readOnly ) {
219 return new LuceneSearchProcessor(getSourceName(), context, workspaces, observer, null, readOnly);
220 }
221
222
223
224
225
226
227
228 @Override
229 protected LuceneSearchWorkspace createWorkspace( ExecutionContext context,
230 String workspaceName ) throws SearchEngineException {
231 return new LuceneSearchWorkspace(workspaceName, configuration, rules, analyzer);
232 }
233
234
235
236
237
238
239 public void index( ExecutionContext context,
240 final Iterable<ChangeRequest> changes ) throws SearchEngineException {
241
242 WorkForWorkspaces allWork = new WorkForWorkspaces(context);
243 for (ChangeRequest change : changes) {
244 WorkspaceWork work = allWork.forWorkspace(change.changedWorkspace());
245 assert work != null;
246 if (!work.add(change)) break;
247 }
248
249
250 SearchEngineIndexer indexer = new SearchEngineIndexer(context, this, getConnectionFactory());
251 try {
252 for (WorkspaceWork workspaceWork : allWork) {
253 if (workspaceWork.indexAllContent) {
254 indexer.index(workspaceWork.workspaceName);
255 } else if (workspaceWork.deleteWorkspace) {
256 indexer.process(new DestroyWorkspaceRequest(workspaceWork.workspaceName));
257 } else {
258 for (WorkRequest request : workspaceWork) {
259 if (request instanceof CrawlSubgraph) {
260 CrawlSubgraph crawlRequest = (CrawlSubgraph)request;
261 Location location = crawlRequest.location;
262 indexer.index(workspaceWork.workspaceName, location, crawlRequest.depth);
263 } else if (request instanceof ForwardRequest) {
264 ForwardRequest forwardRequest = (ForwardRequest)request;
265 indexer.process(forwardRequest.changeRequest.clone());
266 }
267 }
268 }
269 }
270 } finally {
271 indexer.close();
272 }
273 }
274
275 protected static class WorkForWorkspaces implements Iterable<WorkspaceWork> {
276 private Map<String, WorkspaceWork> byWorkspaceName = new HashMap<String, WorkspaceWork>();
277 private final ExecutionContext context;
278
279 protected WorkForWorkspaces( ExecutionContext context ) {
280 this.context = context;
281 }
282
283 protected WorkspaceWork forWorkspace( String workspaceName ) {
284 WorkspaceWork work = byWorkspaceName.get(workspaceName);
285 if (work == null) {
286 work = new WorkspaceWork(context, workspaceName);
287 byWorkspaceName.put(workspaceName, work);
288 }
289 return work;
290 }
291
292 public Iterator<WorkspaceWork> iterator() {
293 return byWorkspaceName.values().iterator();
294 }
295
296
297
298
299
300
301 @Override
302 public String toString() {
303 StringBuilder sb = new StringBuilder();
304 for (WorkspaceWork work : byWorkspaceName.values()) {
305 sb.append(work.toString()).append('\n');
306 }
307 return sb.toString();
308 }
309 }
310
311 protected static class WorkspaceWork implements Iterable<WorkRequest> {
312 private final ExecutionContext context;
313 protected final String workspaceName;
314 protected final Map<Path, WorkRequest> requestByPath = new HashMap<Path, WorkRequest>();
315 protected boolean indexAllContent;
316 protected boolean deleteWorkspace;
317
318 protected WorkspaceWork( ExecutionContext context,
319 String workspaceName ) {
320 this.context = context;
321 this.workspaceName = workspaceName;
322 }
323
324 public Iterator<WorkRequest> iterator() {
325 return requestByPath.values().iterator();
326 }
327
328 protected boolean add( ChangeRequest change ) {
329 assert !indexAllContent;
330 assert !deleteWorkspace;
331
332 switch (change.getType()) {
333 case CLONE_WORKSPACE:
334 case CREATE_WORKSPACE:
335 requestByPath.clear();
336 indexAllContent = true;
337 return false;
338 case DESTROY_WORKSPACE:
339 requestByPath.clear();
340 deleteWorkspace = true;
341 return false;
342 default:
343
344 break;
345 }
346
347 Location changedLocation = change.changedLocation();
348 assert changedLocation.hasPath();
349 if (isCoveredByExistingCrawl(changedLocation.getPath())) {
350
351 return true;
352 }
353
354 switch (change.getType()) {
355 case UPDATE_PROPERTIES:
356
357 if (mergeWithExistingWork(changedLocation, change)) return true;
358
359
360 UpdatePropertiesRequest update = (UpdatePropertiesRequest)change;
361 if (update.removeOtherProperties()) {
362
363 forward(changedLocation, update);
364 return true;
365 }
366
367 crawl(update.changedLocation(), 1);
368 return true;
369 case SET_PROPERTY:
370
371 if (mergeWithExistingWork(changedLocation, change)) return true;
372
373
374 crawl(changedLocation, 1);
375 return true;
376 case UPDATE_VALUES:
377
378 if (mergeWithExistingWork(changedLocation, change)) return true;
379
380
381 crawl(changedLocation, 1);
382 return true;
383 case CREATE_NODE:
384 forward(changedLocation, change);
385 return true;
386 default:
387
388 break;
389 }
390
391
392 requestByPath.clear();
393 indexAllContent = true;
394 return false;
395 }
396
397 private boolean mergeWithExistingWork( Location location,
398 ChangeRequest change ) {
399 Path path = location.getPath();
400 WorkRequest existing = requestByPath.get(path);
401 if (existing instanceof CrawlSubgraph) {
402
403 return true;
404 }
405 if (existing instanceof ForwardRequest) {
406 ChangeRequest existingRequest = ((ForwardRequest)existing).changeRequest;
407
408 ChangeRequest merged = merge(context, existingRequest, change);
409 if (merged != null) {
410 forward(location, merged);
411 return true;
412 }
413 }
414
415 return false;
416 }
417
418 private void forward( Location location,
419 ChangeRequest request ) {
420 requestByPath.put(location.getPath(), new ForwardRequest(request));
421 }
422
423 private boolean isCoveredByExistingCrawl( Path path ) {
424
425 while (!path.isRoot()) {
426 path = path.getParent();
427 WorkRequest existing = requestByPath.get(path);
428 if (existing == null) continue;
429 if (existing instanceof CrawlSubgraph) {
430 CrawlSubgraph crawl = (CrawlSubgraph)existing;
431 if (crawl.depth != 1) {
432
433 return true;
434 }
435 }
436 }
437 return false;
438 }
439
440 private void crawl( Location location,
441 int depth ) {
442 Path path = location.getPath();
443
444 Iterator<Map.Entry<Path, WorkRequest>> iter = requestByPath.entrySet().iterator();
445 while (iter.hasNext()) {
446 if (iter.next().getKey().isDecendantOf(path)) iter.remove();
447 }
448 requestByPath.put(path, new CrawlSubgraph(location, depth));
449 }
450
451
452
453
454
455
456 @Override
457 public String toString() {
458 StringBuilder sb = new StringBuilder();
459 sb.append(" Workspace: ").append(workspaceName).append('\n');
460 for (Map.Entry<Path, WorkRequest> entry : requestByPath.entrySet()) {
461 sb.append(" ")
462 .append(entry.getKey().getString(context.getNamespaceRegistry()))
463 .append("->")
464 .append(entry.getValue().toString(context))
465 .append('\n');
466 }
467 return sb.toString();
468 }
469 }
470
471 protected static ChangeRequest merge( ExecutionContext context,
472 ChangeRequest original,
473 ChangeRequest change ) {
474 assert !original.hasError();
475 assert !change.hasError();
476 if (RequestType.CREATE_NODE == original.getType()) {
477 CreateNodeRequest create = (CreateNodeRequest)original;
478
479 Map<Name, Property> newProperties;
480 CreateNodeRequest newRequest;
481
482 switch (change.getType()) {
483 case CREATE_NODE:
484 SetPropertyRequest set = (SetPropertyRequest)change;
485 Name newPropertyName = set.property().getName();
486 List<Property> newPropertiesList = new LinkedList<Property>();
487 for (Property property : create.properties()) {
488 if (property.getName().equals(newPropertyName)) continue;
489 newPropertiesList.add(property);
490 }
491 newPropertiesList.add(set.property());
492 newRequest = new CreateNodeRequest(create.under(), create.inWorkspace(), create.named(),
493 create.conflictBehavior(), newPropertiesList);
494 newRequest.setActualLocationOfNode(create.getActualLocationOfNode());
495 return newRequest;
496
497 case UPDATE_PROPERTIES:
498 UpdatePropertiesRequest update = (UpdatePropertiesRequest)change;
499 newProperties = new HashMap<Name, Property>();
500 for (Property property : create.properties()) {
501 newProperties.put(property.getName(), property);
502 }
503 newProperties.putAll(update.properties());
504 newRequest = new CreateNodeRequest(create.under(), create.inWorkspace(), create.named(),
505 create.conflictBehavior(), newProperties.values());
506 newRequest.setActualLocationOfNode(create.getActualLocationOfNode());
507 return newRequest;
508
509 case REMOVE_PROPERTY:
510 RemovePropertyRequest remove = (RemovePropertyRequest)change;
511 newProperties = new HashMap<Name, Property>();
512 for (Property property : create.properties()) {
513 newProperties.put(property.getName(), property);
514 }
515 newProperties.remove(remove.propertyName());
516 newRequest = new CreateNodeRequest(create.under(), create.inWorkspace(), create.named(),
517 create.conflictBehavior(), newProperties.values());
518 newRequest.setActualLocationOfNode(create.getActualLocationOfNode());
519 return newRequest;
520
521 case UPDATE_VALUES:
522 UpdateValuesRequest updateRequest = (UpdateValuesRequest)change;
523 newProperties = new HashMap<Name, Property>();
524 for (Property property : create.properties()) {
525 newProperties.put(property.getName(), property);
526 }
527 Property updated = newProperties.get(updateRequest.property());
528 if (updated != null) {
529
530 List<Object> newValues = new LinkedList<Object>();
531 for (Object existingValue : updated) {
532 newValues.add(existingValue);
533 }
534 newValues.removeAll(updateRequest.removedValues());
535 newValues.addAll(updateRequest.addedValues());
536 updated = context.getPropertyFactory().create(updateRequest.property(), newValues);
537 } else {
538
539 updated = context.getPropertyFactory().create(updateRequest.property(), updateRequest.addedValues());
540 }
541 newProperties.put(updated.getName(), updated);
542 newRequest = new CreateNodeRequest(create.under(), create.inWorkspace(), create.named(),
543 create.conflictBehavior(), newProperties.values());
544 newRequest.setActualLocationOfNode(create.getActualLocationOfNode());
545 return newRequest;
546 default:
547
548 break;
549 }
550 }
551 if (RequestType.UPDATE_PROPERTIES == original.getType()) {
552 UpdatePropertiesRequest update = (UpdatePropertiesRequest)original;
553 UpdatePropertiesRequest newRequest;
554 Map<Name, Property> newProperties;
555
556 switch (change.getType()) {
557 case SET_PROPERTY:
558 SetPropertyRequest set = (SetPropertyRequest)change;
559 Property newProperty = set.property();
560 Name newPropertyName = newProperty.getName();
561 newProperties = new HashMap<Name, Property>(update.properties());
562 newProperties.put(newPropertyName, newProperty);
563 newRequest = new UpdatePropertiesRequest(update.getActualLocationOfNode(), update.inWorkspace(),
564 newProperties, update.removeOtherProperties());
565 newRequest.setActualLocationOfNode(update.getActualLocationOfNode());
566 return newRequest;
567 case REMOVE_PROPERTY:
568 RemovePropertyRequest remove = (RemovePropertyRequest)change;
569 newProperties = new HashMap<Name, Property>(update.properties());
570 newProperties.remove(remove.propertyName());
571 newRequest = new UpdatePropertiesRequest(update.getActualLocationOfNode(), update.inWorkspace(),
572 newProperties, update.removeOtherProperties());
573 newRequest.setActualLocationOfNode(update.getActualLocationOfNode());
574 return newRequest;
575 case UPDATE_VALUES:
576 UpdateValuesRequest updateValues = (UpdateValuesRequest)change;
577 newProperties = new HashMap<Name, Property>(update.properties());
578 Property updated = newProperties.get(updateValues.property());
579 if (updated != null) {
580
581 List<Object> newValues = new LinkedList<Object>();
582 for (Object existingValue : updated) {
583 newValues.add(existingValue);
584 }
585 newValues.removeAll(updateValues.removedValues());
586 newValues.addAll(updateValues.addedValues());
587 updated = context.getPropertyFactory().create(updateValues.property(), newValues);
588 } else {
589
590 updated = context.getPropertyFactory().create(updateValues.property(), updateValues.addedValues());
591 }
592 newProperties.put(updated.getName(), updated);
593 newRequest = new UpdatePropertiesRequest(update.getActualLocationOfNode(), update.inWorkspace(),
594 newProperties, update.removeOtherProperties());
595 newRequest.setActualLocationOfNode(update.getActualLocationOfNode());
596 return newRequest;
597 default:
598
599 break;
600 }
601 }
602 return null;
603 }
604
605 @Immutable
606 protected static abstract class WorkRequest {
607 public abstract String toString( ExecutionContext context );
608
609
610
611
612
613
614 @Override
615 public String toString() {
616 return toString(new ExecutionContext());
617 }
618 }
619
620 @Immutable
621 protected static class CrawlSubgraph extends WorkRequest {
622 protected final Location location;
623 protected final int depth;
624
625 protected CrawlSubgraph( Location location,
626 int depth ) {
627 this.location = location;
628 this.depth = depth;
629 }
630
631 @Override
632 public String toString( ExecutionContext context ) {
633 return "Crawl " + location.getPath().getString(context.getNamespaceRegistry());
634 }
635 }
636
637 @Immutable
638 protected static class ForwardRequest extends WorkRequest {
639 protected final ChangeRequest changeRequest;
640
641 protected ForwardRequest( ChangeRequest changeRequest ) {
642 this.changeRequest = changeRequest;
643 }
644
645 @Override
646 public String toString( ExecutionContext context ) {
647 return "Forward " + changeRequest;
648 }
649 }
650 }