1 | /* |
2 | |
3 | Derby - Class org.apache.derby.impl.tools.ij.StatementFinder |
4 | |
5 | Copyright 1997, 2004 The Apache Software Foundation or its licensors, as applicable. |
6 | |
7 | Licensed under the Apache License, Version 2.0 (the "License"); |
8 | you may not use this file except in compliance with the License. |
9 | You may obtain a copy of the License at |
10 | |
11 | http://www.apache.org/licenses/LICENSE-2.0 |
12 | |
13 | Unless required by applicable law or agreed to in writing, software |
14 | distributed under the License is distributed on an "AS IS" BASIS, |
15 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
16 | See the License for the specific language governing permissions and |
17 | limitations under the License. |
18 | |
19 | */ |
20 | |
21 | package org.apache.derby.impl.tools.ij; |
22 | |
23 | import java.io.IOException; |
24 | import java.io.Reader; |
25 | |
26 | /** |
27 | StatementGrabber looks through an input stream for |
28 | the next JSQL statement. A statement is considered to |
29 | be any tokens up to the next semicolon or EOF. |
30 | <p> |
31 | Semicolons inside comments, strings, and delimited identifiers |
32 | are not considered to be statement terminators but to be |
33 | part of those tokens. |
34 | <p> |
35 | The only comment form currently recognized is the SQL comment, |
36 | which begins with "--" and ends at the next EOL. |
37 | <p> |
38 | Strings and delimited identifiers are permitted to contain |
39 | newlines; the actual IJ or JSQL parsers will report errors when |
40 | those cases occur. |
41 | <p> |
42 | There are no escaped characters, i.e. "\n" is considered to |
43 | be two characters, '\' and 'n'. |
44 | |
45 | @author ames |
46 | */ |
47 | |
48 | public class StatementFinder { |
49 | |
50 | private Reader source; |
51 | private StringBuffer statement = new StringBuffer(); |
52 | private int state; |
53 | private boolean atEOF = false; |
54 | private boolean peekEOF = false; |
55 | private char peekChar; |
56 | private boolean peeked = false; |
57 | |
58 | // state variables |
59 | private static final int IN_STATEMENT = 0; |
60 | private static final int IN_STRING = 1; |
61 | private static final int IN_SQLCOMMENT = 2; |
62 | private static final int END_OF_STATEMENT = 3; |
63 | private static final int END_OF_INPUT = 4; |
64 | |
65 | // special state-changing characters |
66 | private static final char MINUS = '-'; |
67 | private static final char SINGLEQUOTE = '\''; |
68 | private static final char DOUBLEQUOTE = '\"'; |
69 | private static final char SEMICOLON = ';'; |
70 | private static final char NEWLINE = '\n'; |
71 | private static final char RETURN = '\r'; |
72 | private static final char SPACE = ' '; |
73 | private static final char TAB = '\t'; |
74 | private static final char FORMFEED = '\f'; |
75 | |
76 | /** |
77 | The constructor does not assume the stream is data input |
78 | or buffered, so it will wrap it appropriately. |
79 | |
80 | @param s the input stream for reading statements from. |
81 | */ |
82 | public StatementFinder(Reader s) { |
83 | source = s; |
84 | } |
85 | |
86 | /** |
87 | Reinit is used to redirect the finder to another stream. |
88 | The previous stream should not have been in a PEEK state. |
89 | |
90 | @param s the input stream for reading statements from. |
91 | */ |
92 | public void ReInit(Reader s) { |
93 | try { |
94 | source.close(); |
95 | } catch (IOException ioe) { |
96 | // just be quiet if it is already gone |
97 | } |
98 | source = s; |
99 | state = IN_STATEMENT; |
100 | atEOF = false; |
101 | peekEOF = false; |
102 | peeked = false; |
103 | } |
104 | |
105 | public void close() throws IOException { |
106 | source.close(); |
107 | } |
108 | |
109 | /** |
110 | get the next statement in the input stream. Returns it, |
111 | dropping its closing semicolon if it has one. If there is |
112 | no next statement, return a null. |
113 | |
114 | @return the next statement in the input stream. |
115 | */ |
116 | public String nextStatement() { |
117 | boolean haveSemi = false; |
118 | char nextChar; |
119 | |
120 | // initialize fields for getting the next statement |
121 | statement.setLength(0); |
122 | if (state == END_OF_INPUT) return null; |
123 | |
124 | state = IN_STATEMENT; |
125 | |
126 | // skip leading whitespace |
127 | nextChar = peekChar(); |
128 | if (peekEOF()) { |
129 | state = END_OF_INPUT; |
130 | return null; |
131 | } |
132 | if (whiteSpace(nextChar)) { |
133 | while (whiteSpace(peekChar()) && ! peekEOF()); |
134 | if (peekEOF()) { |
135 | state = END_OF_INPUT; |
136 | return null; |
137 | } |
138 | } |
139 | |
140 | while (state != END_OF_STATEMENT && state != END_OF_INPUT) { |
141 | |
142 | // get the next character from the input |
143 | nextChar = readChar(); |
144 | if (atEOF()) { |
145 | state = END_OF_INPUT; |
146 | break; |
147 | } |
148 | |
149 | switch(nextChar) { |
150 | case MINUS: |
151 | readSingleLineComment(nextChar); |
152 | break; |
153 | case SINGLEQUOTE: |
154 | case DOUBLEQUOTE: |
155 | readString(nextChar); |
156 | break; |
157 | case SEMICOLON: |
158 | haveSemi = true; |
159 | state = END_OF_STATEMENT; |
160 | break; |
161 | default: |
162 | // keep going, just a normal character |
163 | break; |
164 | } |
165 | } |
166 | |
167 | if (haveSemi) |
168 | statement.setLength(statement.length()-1); |
169 | return statement.toString(); |
170 | } |
171 | |
172 | /** |
173 | Determine if the given character is considered whitespace |
174 | |
175 | @param c the character to consider |
176 | @return true if the character is whitespace |
177 | */ |
178 | private boolean whiteSpace(char c) { |
179 | return (c == SPACE || |
180 | c == TAB || |
181 | c == RETURN || |
182 | c == NEWLINE || |
183 | c == FORMFEED); |
184 | } |
185 | |
186 | /** |
187 | Advance the source stream to the end of a comment if it |
188 | is on one, assuming the first character of |
189 | a potential single line comment has been found. |
190 | If it is not a comment, do not advance the stream. |
191 | <p> |
192 | The form of a single line comment is, in regexp, XX.*$, |
193 | where XX is two instances of commentChar. |
194 | |
195 | @param commentChar the character whose duplication signifies |
196 | the start of the comment. |
197 | */ |
198 | private void readSingleLineComment(char commentChar) { |
199 | char nextChar; |
200 | |
201 | nextChar = peekChar(); |
202 | // if next char is EOF, we are done. |
203 | if (peekEOF()) return; |
204 | |
205 | // if nextChar is not a minus, it was just a normal minus, |
206 | // nothing special to do |
207 | if (nextChar != commentChar) return; |
208 | |
209 | // we are really in a comment |
210 | readChar(); // grab the minus for real. |
211 | |
212 | state = IN_SQLCOMMENT; |
213 | do { |
214 | nextChar = peekChar(); |
215 | if (peekEOF()) { |
216 | // let the caller process the EOF, don't read it |
217 | state = IN_STATEMENT; |
218 | return; |
219 | } |
220 | switch (nextChar) { |
221 | case NEWLINE: |
222 | case RETURN: |
223 | readChar(); // okay to process the character |
224 | state = IN_STATEMENT; |
225 | return; |
226 | default: |
227 | readChar(); // process the character, still in comment |
228 | break; |
229 | } |
230 | } while (state == IN_SQLCOMMENT); // could be while true... |
231 | } |
232 | |
233 | /** |
234 | Advance the stream to the end of the string. |
235 | Assumes the opening delimiter of the string has been read. |
236 | This handles the SQL ability to put the delimiter within |
237 | the string by doubling it, by reading those as two strings |
238 | sitting next to one another. I.e, 'Mary''s lamb' is read |
239 | by this class as two strings, 'Mary' and 's lamb'. |
240 | <p> |
241 | The delimiter of the string is expected to be repeated at |
242 | its other end. If the other flavor of delimiter occurs within |
243 | the string, it is just a normal character within it. |
244 | <p> |
245 | All characters except the delimiter are permitted within the |
246 | string. If EOF is hit before the closing delimiter is found, |
247 | the end of the string is assumed. Parsers using this parser |
248 | will detect the error in that case and return appropriate messages. |
249 | |
250 | @param stringDelimiter the starting and ending character |
251 | for the string being read. |
252 | */ |
253 | private void readString(char stringDelimiter) { |
254 | state = IN_STRING; |
255 | do { |
256 | char nextChar = readChar(); |
257 | |
258 | if (atEOF()) { |
259 | state = END_OF_INPUT; |
260 | return; |
261 | } |
262 | |
263 | if (nextChar == stringDelimiter) { |
264 | // we've reached the end of the string |
265 | state = IN_STATEMENT; |
266 | return; |
267 | } |
268 | |
269 | // still in string |
270 | } while (state == IN_STRING); // could be while true... |
271 | } |
272 | |
273 | private boolean atEOF() { |
274 | return atEOF; |
275 | } |
276 | |
277 | private boolean peekEOF() { |
278 | return peekEOF; |
279 | } |
280 | |
281 | /** |
282 | return the next character in the source stream and |
283 | append it to the statement buffer. |
284 | |
285 | @return the next character in the source stream. |
286 | */ |
287 | private char readChar() { |
288 | if (!peeked) peekChar(); |
289 | |
290 | peeked = false; |
291 | atEOF = peekEOF; |
292 | |
293 | if (!atEOF) statement.append(peekChar); |
294 | |
295 | return peekChar; |
296 | } |
297 | |
298 | /** |
299 | return the next character in the source stream, without |
300 | advancing. |
301 | |
302 | @return the next character in the source stream. |
303 | */ |
304 | private char peekChar() { |
305 | peeked = true; |
306 | char c = '\00'; |
307 | |
308 | try { |
309 | int cInt; |
310 | |
311 | // REMIND: this is assuming a flat ascii source file. |
312 | // will need to beef it up at some future point to |
313 | // understand whether the stream is ascii or something else. |
314 | cInt = source.read(); |
315 | peekEOF = (cInt == -1); |
316 | if (!peekEOF) c = (char)cInt; |
317 | } catch (IOException ie) { |
318 | throw ijException.iOException(ie); |
319 | } |
320 | |
321 | peekChar = c; |
322 | return c; |
323 | } |
324 | } |