1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24 package org.modeshape.graph.query.parse;
25
26 import java.util.ArrayList;
27 import java.util.List;
28 import org.modeshape.common.CommonI18n;
29 import org.modeshape.common.text.ParsingException;
30 import org.modeshape.common.text.Position;
31 import org.modeshape.common.text.TokenStream;
32 import org.modeshape.common.text.TokenStream.CharacterStream;
33 import org.modeshape.common.text.TokenStream.Token;
34 import org.modeshape.common.text.TokenStream.Tokenizer;
35 import org.modeshape.common.text.TokenStream.Tokens;
36 import org.modeshape.common.util.CheckArg;
37 import org.modeshape.graph.query.model.FullTextSearch.Conjunction;
38 import org.modeshape.graph.query.model.FullTextSearch.Disjunction;
39 import org.modeshape.graph.query.model.FullTextSearch.NegationTerm;
40 import org.modeshape.graph.query.model.FullTextSearch.SimpleTerm;
41 import org.modeshape.graph.query.model.FullTextSearch.Term;
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65 public class FullTextSearchParser {
66
67
68
69
70
71
72
73
74
75 public Term parse( String fullTextSearchExpression ) {
76 CheckArg.isNotNull(fullTextSearchExpression, "fullTextSearchExpression");
77 Tokenizer tokenizer = new TermTokenizer();
78 TokenStream stream = new TokenStream(fullTextSearchExpression, tokenizer, false);
79 return parse(stream.start());
80 }
81
82
83
84
85
86
87
88
89
90
91 public Term parse( TokenStream tokens ) {
92 CheckArg.isNotNull(tokens, "tokens");
93 List<Term> terms = new ArrayList<Term>();
94 do {
95 Term term = parseDisjunctedTerms(tokens);
96 if (term == null) break;
97 terms.add(term);
98 } while (tokens.canConsume("OR"));
99 if (terms.isEmpty()) return null;
100 return terms.size() > 1 ? new Disjunction(terms) : terms.iterator().next();
101 }
102
103 protected Term parseDisjunctedTerms( TokenStream tokens ) {
104 List<Term> terms = new ArrayList<Term>();
105 do {
106 Term term = parseTerm(tokens);
107 if (term == null) break;
108 terms.add(term);
109 } while (tokens.hasNext() && !tokens.matches("OR"));
110 if (terms.isEmpty()) return null;
111 return terms.size() > 1 ? new Conjunction(terms) : terms.iterator().next();
112 }
113
114 protected Term parseTerm( TokenStream tokens ) {
115 boolean negated = tokens.canConsume('-');
116 if (!negated) tokens.canConsume('+');
117 Term result = new SimpleTerm(removeQuotes(tokens.consume()));
118 return negated ? new NegationTerm(result) : result;
119 }
120
121
122
123
124
125
126
127 protected String removeQuotes( String text ) {
128 return text.replaceFirst("^['\"]+", "").replaceAll("['\"]+$", "");
129 }
130
131
132
133
134
135
136
137
138
139 public static class TermTokenizer implements Tokenizer {
140
141
142
143
144 public static final int WORD = 1;
145
146
147
148
149 public static final int PLUS_MINUS = 2;
150
151
152
153
154 public static final int SINGLE_QUOTED_STRING = 4;
155
156
157
158
159 public static final int DOUBLE_QUOTED_STRING = 8;
160
161 protected TermTokenizer() {
162 }
163
164
165
166
167
168
169 public void tokenize( CharacterStream input,
170 Tokens tokens ) throws ParsingException {
171 while (input.hasNext()) {
172 char c = input.next();
173 switch (c) {
174 case ' ':
175 case '\t':
176 case '\n':
177 case '\r':
178
179 break;
180 case '-':
181 case '+':
182 tokens.addToken(input.position(input.index()), input.index(), input.index() + 1, PLUS_MINUS);
183 break;
184 case '\"':
185 int startIndex = input.index();
186 Position startingPosition = input.position(startIndex);
187 boolean foundClosingQuote = false;
188 while (input.hasNext()) {
189 c = input.next();
190 if (c == '\\' && input.isNext('"')) {
191 c = input.next();
192 } else if (c == '"') {
193 foundClosingQuote = true;
194 break;
195 }
196 }
197 if (!foundClosingQuote) {
198 String msg = CommonI18n.noMatchingDoubleQuoteFound.text(startingPosition.getLine(),
199 startingPosition.getColumn());
200 throw new ParsingException(startingPosition, msg);
201 }
202 int endIndex = input.index() + 1;
203 tokens.addToken(startingPosition, startIndex, endIndex, DOUBLE_QUOTED_STRING);
204 break;
205 case '\'':
206 startIndex = input.index();
207 startingPosition = input.position(startIndex);
208 foundClosingQuote = false;
209 while (input.hasNext()) {
210 c = input.next();
211 if (c == '\\' && input.isNext('\'')) {
212 c = input.next();
213 } else if (c == '\'') {
214 foundClosingQuote = true;
215 break;
216 }
217 }
218 if (!foundClosingQuote) {
219 String msg = CommonI18n.noMatchingSingleQuoteFound.text(startingPosition.getLine(),
220 startingPosition.getColumn());
221 throw new ParsingException(startingPosition, msg);
222 }
223 endIndex = input.index() + 1;
224 tokens.addToken(startingPosition, startIndex, endIndex, SINGLE_QUOTED_STRING);
225 break;
226 default:
227 startIndex = input.index();
228 startingPosition = input.position(startIndex);
229
230 while (input.hasNext() && !(input.isNextWhitespace())) {
231 c = input.next();
232 }
233 endIndex = input.index() + 1;
234 tokens.addToken(startingPosition, startIndex, endIndex, WORD);
235 }
236 }
237 }
238 }
239
240 }