1 | /* |
2 | * Copyright 1999-2006 Sun Microsystems, Inc. All Rights Reserved. |
3 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
4 | * |
5 | * This code is free software; you can redistribute it and/or modify it |
6 | * under the terms of the GNU General Public License version 2 only, as |
7 | * published by the Free Software Foundation. Sun designates this |
8 | * particular file as subject to the "Classpath" exception as provided |
9 | * by Sun in the LICENSE file that accompanied this code. |
10 | * |
11 | * This code is distributed in the hope that it will be useful, but WITHOUT |
12 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
13 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
14 | * version 2 for more details (a copy is included in the LICENSE file that |
15 | * accompanied this code). |
16 | * |
17 | * You should have received a copy of the GNU General Public License version |
18 | * 2 along with this work; if not, write to the Free Software Foundation, |
19 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
20 | * |
21 | * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, |
22 | * CA 95054 USA or visit www.sun.com if you need additional information or |
23 | * have any questions. |
24 | */ |
25 | |
26 | package com.sun.tools.javac.parser; |
27 | |
28 | import java.io.*; |
29 | import java.nio.*; |
30 | import java.nio.ByteBuffer; |
31 | import java.nio.charset.*; |
32 | import java.nio.channels.*; |
33 | import java.util.regex.*; |
34 | |
35 | import com.sun.tools.javac.util.*; |
36 | |
37 | import com.sun.tools.javac.code.Source; |
38 | |
39 | import static com.sun.tools.javac.parser.Token.*; |
40 | import static com.sun.tools.javac.util.LayoutCharacters.*; |
41 | |
42 | /** The lexical analyzer maps an input stream consisting of |
43 | * ASCII characters and Unicode escapes into a token sequence. |
44 | * |
45 | * <p><b>This is NOT part of any API supported by Sun Microsystems. If |
46 | * you write code that depends on this, you do so at your own risk. |
47 | * This code and its internal interfaces are subject to change or |
48 | * deletion without notice.</b> |
49 | */ |
50 | public class Scanner implements Lexer { |
51 | |
52 | private static boolean scannerDebug = false; |
53 | |
54 | /** A factory for creating scanners. */ |
55 | public static class Factory { |
56 | /** The context key for the scanner factory. */ |
57 | public static final Context.Key<Scanner.Factory> scannerFactoryKey = |
58 | new Context.Key<Scanner.Factory>(); |
59 | |
60 | /** Get the Factory instance for this context. */ |
61 | public static Factory instance(Context context) { |
62 | Factory instance = context.get(scannerFactoryKey); |
63 | if (instance == null) |
64 | instance = new Factory(context); |
65 | return instance; |
66 | } |
67 | |
68 | final Log log; |
69 | final Name.Table names; |
70 | final Source source; |
71 | final Keywords keywords; |
72 | |
73 | /** Create a new scanner factory. */ |
74 | protected Factory(Context context) { |
75 | context.put(scannerFactoryKey, this); |
76 | this.log = Log.instance(context); |
77 | this.names = Name.Table.instance(context); |
78 | this.source = Source.instance(context); |
79 | this.keywords = Keywords.instance(context); |
80 | } |
81 | |
82 | public Scanner newScanner(CharSequence input) { |
83 | if (input instanceof CharBuffer) { |
84 | return new Scanner(this, (CharBuffer)input); |
85 | } else { |
86 | char[] array = input.toString().toCharArray(); |
87 | return newScanner(array, array.length); |
88 | } |
89 | } |
90 | |
91 | public Scanner newScanner(char[] input, int inputLength) { |
92 | return new Scanner(this, input, inputLength); |
93 | } |
94 | } |
95 | |
96 | /* Output variables; set by nextToken(): |
97 | */ |
98 | |
99 | /** The token, set by nextToken(). |
100 | */ |
101 | private Token token; |
102 | |
103 | /** Allow hex floating-point literals. |
104 | */ |
105 | private boolean allowHexFloats; |
106 | |
107 | /** The token's position, 0-based offset from beginning of text. |
108 | */ |
109 | private int pos; |
110 | |
111 | /** Character position just after the last character of the token. |
112 | */ |
113 | private int endPos; |
114 | |
115 | /** The last character position of the previous token. |
116 | */ |
117 | private int prevEndPos; |
118 | |
119 | /** The position where a lexical error occurred; |
120 | */ |
121 | private int errPos = Position.NOPOS; |
122 | |
123 | /** The name of an identifier or token: |
124 | */ |
125 | private Name name; |
126 | |
127 | /** The radix of a numeric literal token. |
128 | */ |
129 | private int radix; |
130 | |
131 | /** Has a @deprecated been encountered in last doc comment? |
132 | * this needs to be reset by client. |
133 | */ |
134 | protected boolean deprecatedFlag = false; |
135 | |
136 | /** A character buffer for literals. |
137 | */ |
138 | private char[] sbuf = new char[128]; |
139 | private int sp; |
140 | |
141 | /** The input buffer, index of next chacter to be read, |
142 | * index of one past last character in buffer. |
143 | */ |
144 | private char[] buf; |
145 | private int bp; |
146 | private int buflen; |
147 | private int eofPos; |
148 | |
149 | /** The current character. |
150 | */ |
151 | private char ch; |
152 | |
153 | /** The buffer index of the last converted unicode character |
154 | */ |
155 | private int unicodeConversionBp = -1; |
156 | |
157 | /** The log to be used for error reporting. |
158 | */ |
159 | private final Log log; |
160 | |
161 | /** The name table. */ |
162 | private final Name.Table names; |
163 | |
164 | /** The keyword table. */ |
165 | private final Keywords keywords; |
166 | |
167 | /** Common code for constructors. */ |
168 | private Scanner(Factory fac) { |
169 | this.log = fac.log; |
170 | this.names = fac.names; |
171 | this.keywords = fac.keywords; |
172 | this.allowHexFloats = fac.source.allowHexFloats(); |
173 | } |
174 | |
175 | private static final boolean hexFloatsWork = hexFloatsWork(); |
176 | private static boolean hexFloatsWork() { |
177 | try { |
178 | Float.valueOf("0x1.0p1"); |
179 | return true; |
180 | } catch (NumberFormatException ex) { |
181 | return false; |
182 | } |
183 | } |
184 | |
185 | /** Create a scanner from the input buffer. buffer must implement |
186 | * array() and compact(), and remaining() must be less than limit(). |
187 | */ |
188 | protected Scanner(Factory fac, CharBuffer buffer) { |
189 | this(fac, JavacFileManager.toArray(buffer), buffer.limit()); |
190 | } |
191 | |
192 | /** |
193 | * Create a scanner from the input array. This method might |
194 | * modify the array. To avoid copying the input array, ensure |
195 | * that {@code inputLength < input.length} or |
196 | * {@code input[input.length -1]} is a white space character. |
197 | * |
198 | * @param fac the factory which created this Scanner |
199 | * @param input the input, might be modified |
200 | * @param inputLength the size of the input. |
201 | * Must be positive and less than or equal to input.length. |
202 | */ |
203 | protected Scanner(Factory fac, char[] input, int inputLength) { |
204 | this(fac); |
205 | eofPos = inputLength; |
206 | if (inputLength == input.length) { |
207 | if (input.length > 0 && Character.isWhitespace(input[input.length - 1])) { |
208 | inputLength--; |
209 | } else { |
210 | char[] newInput = new char[inputLength + 1]; |
211 | System.arraycopy(input, 0, newInput, 0, input.length); |
212 | input = newInput; |
213 | } |
214 | } |
215 | buf = input; |
216 | buflen = inputLength; |
217 | buf[buflen] = EOI; |
218 | bp = -1; |
219 | scanChar(); |
220 | } |
221 | |
222 | /** Report an error at the given position using the provided arguments. |
223 | */ |
224 | private void lexError(int pos, String key, Object... args) { |
225 | log.error(pos, key, args); |
226 | token = ERROR; |
227 | errPos = pos; |
228 | } |
229 | |
230 | /** Report an error at the current token position using the provided |
231 | * arguments. |
232 | */ |
233 | private void lexError(String key, Object... args) { |
234 | lexError(pos, key, args); |
235 | } |
236 | |
237 | /** Convert an ASCII digit from its base (8, 10, or 16) |
238 | * to its value. |
239 | */ |
240 | private int digit(int base) { |
241 | char c = ch; |
242 | int result = Character.digit(c, base); |
243 | if (result >= 0 && c > 0x7f) { |
244 | lexError(pos+1, "illegal.nonascii.digit"); |
245 | ch = "0123456789abcdef".charAt(result); |
246 | } |
247 | return result; |
248 | } |
249 | |
250 | /** Convert unicode escape; bp points to initial '\' character |
251 | * (Spec 3.3). |
252 | */ |
253 | private void convertUnicode() { |
254 | if (ch == '\\' && unicodeConversionBp != bp) { |
255 | bp++; ch = buf[bp]; |
256 | if (ch == 'u') { |
257 | do { |
258 | bp++; ch = buf[bp]; |
259 | } while (ch == 'u'); |
260 | int limit = bp + 3; |
261 | if (limit < buflen) { |
262 | int d = digit(16); |
263 | int code = d; |
264 | while (bp < limit && d >= 0) { |
265 | bp++; ch = buf[bp]; |
266 | d = digit(16); |
267 | code = (code << 4) + d; |
268 | } |
269 | if (d >= 0) { |
270 | ch = (char)code; |
271 | unicodeConversionBp = bp; |
272 | return; |
273 | } |
274 | } |
275 | lexError(bp, "illegal.unicode.esc"); |
276 | } else { |
277 | bp--; |
278 | ch = '\\'; |
279 | } |
280 | } |
281 | } |
282 | |
283 | /** Read next character. |
284 | */ |
285 | private void scanChar() { |
286 | ch = buf[++bp]; |
287 | if (ch == '\\') { |
288 | convertUnicode(); |
289 | } |
290 | } |
291 | |
292 | /** Read next character in comment, skipping over double '\' characters. |
293 | */ |
294 | private void scanCommentChar() { |
295 | scanChar(); |
296 | if (ch == '\\') { |
297 | if (buf[bp+1] == '\\' && unicodeConversionBp != bp) { |
298 | bp++; |
299 | } else { |
300 | convertUnicode(); |
301 | } |
302 | } |
303 | } |
304 | |
305 | /** Append a character to sbuf. |
306 | */ |
307 | private void putChar(char ch) { |
308 | if (sp == sbuf.length) { |
309 | char[] newsbuf = new char[sbuf.length * 2]; |
310 | System.arraycopy(sbuf, 0, newsbuf, 0, sbuf.length); |
311 | sbuf = newsbuf; |
312 | } |
313 | sbuf[sp++] = ch; |
314 | } |
315 | |
316 | /** For debugging purposes: print character. |
317 | */ |
318 | private void dch() { |
319 | System.err.print(ch); System.out.flush(); |
320 | } |
321 | |
322 | /** Read next character in character or string literal and copy into sbuf. |
323 | */ |
324 | private void scanLitChar() { |
325 | if (ch == '\\') { |
326 | if (buf[bp+1] == '\\' && unicodeConversionBp != bp) { |
327 | bp++; |
328 | putChar('\\'); |
329 | scanChar(); |
330 | } else { |
331 | scanChar(); |
332 | switch (ch) { |
333 | case '0': case '1': case '2': case '3': |
334 | case '4': case '5': case '6': case '7': |
335 | char leadch = ch; |
336 | int oct = digit(8); |
337 | scanChar(); |
338 | if ('0' <= ch && ch <= '7') { |
339 | oct = oct * 8 + digit(8); |
340 | scanChar(); |
341 | if (leadch <= '3' && '0' <= ch && ch <= '7') { |
342 | oct = oct * 8 + digit(8); |
343 | scanChar(); |
344 | } |
345 | } |
346 | putChar((char)oct); |
347 | break; |
348 | case 'b': |
349 | putChar('\b'); scanChar(); break; |
350 | case 't': |
351 | putChar('\t'); scanChar(); break; |
352 | case 'n': |
353 | putChar('\n'); scanChar(); break; |
354 | case 'f': |
355 | putChar('\f'); scanChar(); break; |
356 | case 'r': |
357 | putChar('\r'); scanChar(); break; |
358 | case '\'': |
359 | putChar('\''); scanChar(); break; |
360 | case '\"': |
361 | putChar('\"'); scanChar(); break; |
362 | case '\\': |
363 | putChar('\\'); scanChar(); break; |
364 | default: |
365 | lexError(bp, "illegal.esc.char"); |
366 | } |
367 | } |
368 | } else if (bp != buflen) { |
369 | putChar(ch); scanChar(); |
370 | } |
371 | } |
372 | |
373 | /** Read fractional part of hexadecimal floating point number. |
374 | */ |
375 | private void scanHexExponentAndSuffix() { |
376 | if (ch == 'p' || ch == 'P') { |
377 | putChar(ch); |
378 | scanChar(); |
379 | if (ch == '+' || ch == '-') { |
380 | putChar(ch); |
381 | scanChar(); |
382 | } |
383 | if ('0' <= ch && ch <= '9') { |
384 | do { |
385 | putChar(ch); |
386 | scanChar(); |
387 | } while ('0' <= ch && ch <= '9'); |
388 | if (!allowHexFloats) { |
389 | lexError("unsupported.fp.lit"); |
390 | allowHexFloats = true; |
391 | } |
392 | else if (!hexFloatsWork) |
393 | lexError("unsupported.cross.fp.lit"); |
394 | } else |
395 | lexError("malformed.fp.lit"); |
396 | } else { |
397 | lexError("malformed.fp.lit"); |
398 | } |
399 | if (ch == 'f' || ch == 'F') { |
400 | putChar(ch); |
401 | scanChar(); |
402 | token = FLOATLITERAL; |
403 | } else { |
404 | if (ch == 'd' || ch == 'D') { |
405 | putChar(ch); |
406 | scanChar(); |
407 | } |
408 | token = DOUBLELITERAL; |
409 | } |
410 | } |
411 | |
412 | /** Read fractional part of floating point number. |
413 | */ |
414 | private void scanFraction() { |
415 | while (digit(10) >= 0) { |
416 | putChar(ch); |
417 | scanChar(); |
418 | } |
419 | int sp1 = sp; |
420 | if (ch == 'e' || ch == 'E') { |
421 | putChar(ch); |
422 | scanChar(); |
423 | if (ch == '+' || ch == '-') { |
424 | putChar(ch); |
425 | scanChar(); |
426 | } |
427 | if ('0' <= ch && ch <= '9') { |
428 | do { |
429 | putChar(ch); |
430 | scanChar(); |
431 | } while ('0' <= ch && ch <= '9'); |
432 | return; |
433 | } |
434 | lexError("malformed.fp.lit"); |
435 | sp = sp1; |
436 | } |
437 | } |
438 | |
439 | /** Read fractional part and 'd' or 'f' suffix of floating point number. |
440 | */ |
441 | private void scanFractionAndSuffix() { |
442 | this.radix = 10; |
443 | scanFraction(); |
444 | if (ch == 'f' || ch == 'F') { |
445 | putChar(ch); |
446 | scanChar(); |
447 | token = FLOATLITERAL; |
448 | } else { |
449 | if (ch == 'd' || ch == 'D') { |
450 | putChar(ch); |
451 | scanChar(); |
452 | } |
453 | token = DOUBLELITERAL; |
454 | } |
455 | } |
456 | |
457 | /** Read fractional part and 'd' or 'f' suffix of floating point number. |
458 | */ |
459 | private void scanHexFractionAndSuffix(boolean seendigit) { |
460 | this.radix = 16; |
461 | assert ch == '.'; |
462 | putChar(ch); |
463 | scanChar(); |
464 | while (digit(16) >= 0) { |
465 | seendigit = true; |
466 | putChar(ch); |
467 | scanChar(); |
468 | } |
469 | if (!seendigit) |
470 | lexError("invalid.hex.number"); |
471 | else |
472 | scanHexExponentAndSuffix(); |
473 | } |
474 | |
475 | /** Read a number. |
476 | * @param radix The radix of the number; one of 8, 10, 16. |
477 | */ |
478 | private void scanNumber(int radix) { |
479 | this.radix = radix; |
480 | // for octal, allow base-10 digit in case it's a float literal |
481 | int digitRadix = (radix <= 10) ? 10 : 16; |
482 | boolean seendigit = false; |
483 | while (digit(digitRadix) >= 0) { |
484 | seendigit = true; |
485 | putChar(ch); |
486 | scanChar(); |
487 | } |
488 | if (radix == 16 && ch == '.') { |
489 | scanHexFractionAndSuffix(seendigit); |
490 | } else if (seendigit && radix == 16 && (ch == 'p' || ch == 'P')) { |
491 | scanHexExponentAndSuffix(); |
492 | } else if (radix <= 10 && ch == '.') { |
493 | putChar(ch); |
494 | scanChar(); |
495 | scanFractionAndSuffix(); |
496 | } else if (radix <= 10 && |
497 | (ch == 'e' || ch == 'E' || |
498 | ch == 'f' || ch == 'F' || |
499 | ch == 'd' || ch == 'D')) { |
500 | scanFractionAndSuffix(); |
501 | } else { |
502 | if (ch == 'l' || ch == 'L') { |
503 | scanChar(); |
504 | token = LONGLITERAL; |
505 | } else { |
506 | token = INTLITERAL; |
507 | } |
508 | } |
509 | } |
510 | |
511 | /** Read an identifier. |
512 | */ |
513 | private void scanIdent() { |
514 | boolean isJavaIdentifierPart; |
515 | char high; |
516 | do { |
517 | if (sp == sbuf.length) putChar(ch); else sbuf[sp++] = ch; |
518 | // optimization, was: putChar(ch); |
519 | |
520 | scanChar(); |
521 | switch (ch) { |
522 | case 'A': case 'B': case 'C': case 'D': case 'E': |
523 | case 'F': case 'G': case 'H': case 'I': case 'J': |
524 | case 'K': case 'L': case 'M': case 'N': case 'O': |
525 | case 'P': case 'Q': case 'R': case 'S': case 'T': |
526 | case 'U': case 'V': case 'W': case 'X': case 'Y': |
527 | case 'Z': |
528 | case 'a': case 'b': case 'c': case 'd': case 'e': |
529 | case 'f': case 'g': case 'h': case 'i': case 'j': |
530 | case 'k': case 'l': case 'm': case 'n': case 'o': |
531 | case 'p': case 'q': case 'r': case 's': case 't': |
532 | case 'u': case 'v': case 'w': case 'x': case 'y': |
533 | case 'z': |
534 | case '$': case '_': |
535 | case '0': case '1': case '2': case '3': case '4': |
536 | case '5': case '6': case '7': case '8': case '9': |
537 | case '\u0000': case '\u0001': case '\u0002': case '\u0003': |
538 | case '\u0004': case '\u0005': case '\u0006': case '\u0007': |
539 | case '\u0008': case '\u000E': case '\u000F': case '\u0010': |
540 | case '\u0011': case '\u0012': case '\u0013': case '\u0014': |
541 | case '\u0015': case '\u0016': case '\u0017': |
542 | case '\u0018': case '\u0019': case '\u001B': |
543 | case '\u007F': |
544 | break; |
545 | case '\u001A': // EOI is also a legal identifier part |
546 | if (bp >= buflen) { |
547 | name = names.fromChars(sbuf, 0, sp); |
548 | token = keywords.key(name); |
549 | return; |
550 | } |
551 | break; |
552 | default: |
553 | if (ch < '\u0080') { |
554 | // all ASCII range chars already handled, above |
555 | isJavaIdentifierPart = false; |
556 | } else { |
557 | high = scanSurrogates(); |
558 | if (high != 0) { |
559 | if (sp == sbuf.length) { |
560 | putChar(high); |
561 | } else { |
562 | sbuf[sp++] = high; |
563 | } |
564 | isJavaIdentifierPart = Character.isJavaIdentifierPart( |
565 | Character.toCodePoint(high, ch)); |
566 | } else { |
567 | isJavaIdentifierPart = Character.isJavaIdentifierPart(ch); |
568 | } |
569 | } |
570 | if (!isJavaIdentifierPart) { |
571 | name = names.fromChars(sbuf, 0, sp); |
572 | token = keywords.key(name); |
573 | return; |
574 | } |
575 | } |
576 | } while (true); |
577 | } |
578 | |
579 | /** Are surrogates supported? |
580 | */ |
581 | final static boolean surrogatesSupported = surrogatesSupported(); |
582 | private static boolean surrogatesSupported() { |
583 | try { |
584 | Character.isHighSurrogate('a'); |
585 | return true; |
586 | } catch (NoSuchMethodError ex) { |
587 | return false; |
588 | } |
589 | } |
590 | |
591 | /** Scan surrogate pairs. If 'ch' is a high surrogate and |
592 | * the next character is a low surrogate, then put the low |
593 | * surrogate in 'ch', and return the high surrogate. |
594 | * otherwise, just return 0. |
595 | */ |
596 | private char scanSurrogates() { |
597 | if (surrogatesSupported && Character.isHighSurrogate(ch)) { |
598 | char high = ch; |
599 | |
600 | scanChar(); |
601 | |
602 | if (Character.isLowSurrogate(ch)) { |
603 | return high; |
604 | } |
605 | |
606 | ch = high; |
607 | } |
608 | |
609 | return 0; |
610 | } |
611 | |
612 | /** Return true if ch can be part of an operator. |
613 | */ |
614 | private boolean isSpecial(char ch) { |
615 | switch (ch) { |
616 | case '!': case '%': case '&': case '*': case '?': |
617 | case '+': case '-': case ':': case '<': case '=': |
618 | case '>': case '^': case '|': case '~': |
619 | case '@': |
620 | return true; |
621 | default: |
622 | return false; |
623 | } |
624 | } |
625 | |
626 | /** Read longest possible sequence of special characters and convert |
627 | * to token. |
628 | */ |
629 | private void scanOperator() { |
630 | while (true) { |
631 | putChar(ch); |
632 | Name newname = names.fromChars(sbuf, 0, sp); |
633 | if (keywords.key(newname) == IDENTIFIER) { |
634 | sp--; |
635 | break; |
636 | } |
637 | name = newname; |
638 | token = keywords.key(newname); |
639 | scanChar(); |
640 | if (!isSpecial(ch)) break; |
641 | } |
642 | } |
643 | |
644 | /** |
645 | * Scan a documention comment; determine if a deprecated tag is present. |
646 | * Called once the initial /, * have been skipped, positioned at the second * |
647 | * (which is treated as the beginning of the first line). |
648 | * Stops positioned at the closing '/'. |
649 | */ |
650 | @SuppressWarnings("fallthrough") |
651 | private void scanDocComment() { |
652 | boolean deprecatedPrefix = false; |
653 | |
654 | forEachLine: |
655 | while (bp < buflen) { |
656 | |
657 | // Skip optional WhiteSpace at beginning of line |
658 | while (bp < buflen && (ch == ' ' || ch == '\t' || ch == FF)) { |
659 | scanCommentChar(); |
660 | } |
661 | |
662 | // Skip optional consecutive Stars |
663 | while (bp < buflen && ch == '*') { |
664 | scanCommentChar(); |
665 | if (ch == '/') { |
666 | return; |
667 | } |
668 | } |
669 | |
670 | // Skip optional WhiteSpace after Stars |
671 | while (bp < buflen && (ch == ' ' || ch == '\t' || ch == FF)) { |
672 | scanCommentChar(); |
673 | } |
674 | |
675 | deprecatedPrefix = false; |
676 | // At beginning of line in the JavaDoc sense. |
677 | if (bp < buflen && ch == '@' && !deprecatedFlag) { |
678 | scanCommentChar(); |
679 | if (bp < buflen && ch == 'd') { |
680 | scanCommentChar(); |
681 | if (bp < buflen && ch == 'e') { |
682 | scanCommentChar(); |
683 | if (bp < buflen && ch == 'p') { |
684 | scanCommentChar(); |
685 | if (bp < buflen && ch == 'r') { |
686 | scanCommentChar(); |
687 | if (bp < buflen && ch == 'e') { |
688 | scanCommentChar(); |
689 | if (bp < buflen && ch == 'c') { |
690 | scanCommentChar(); |
691 | if (bp < buflen && ch == 'a') { |
692 | scanCommentChar(); |
693 | if (bp < buflen && ch == 't') { |
694 | scanCommentChar(); |
695 | if (bp < buflen && ch == 'e') { |
696 | scanCommentChar(); |
697 | if (bp < buflen && ch == 'd') { |
698 | deprecatedPrefix = true; |
699 | scanCommentChar(); |
700 | }}}}}}}}}}} |
701 | if (deprecatedPrefix && bp < buflen) { |
702 | if (Character.isWhitespace(ch)) { |
703 | deprecatedFlag = true; |
704 | } else if (ch == '*') { |
705 | scanCommentChar(); |
706 | if (ch == '/') { |
707 | deprecatedFlag = true; |
708 | return; |
709 | } |
710 | } |
711 | } |
712 | |
713 | // Skip rest of line |
714 | while (bp < buflen) { |
715 | switch (ch) { |
716 | case '*': |
717 | scanCommentChar(); |
718 | if (ch == '/') { |
719 | return; |
720 | } |
721 | break; |
722 | case CR: // (Spec 3.4) |
723 | scanCommentChar(); |
724 | if (ch != LF) { |
725 | continue forEachLine; |
726 | } |
727 | /* fall through to LF case */ |
728 | case LF: // (Spec 3.4) |
729 | scanCommentChar(); |
730 | continue forEachLine; |
731 | default: |
732 | scanCommentChar(); |
733 | } |
734 | } // rest of line |
735 | } // forEachLine |
736 | return; |
737 | } |
738 | |
739 | /** The value of a literal token, recorded as a string. |
740 | * For integers, leading 0x and 'l' suffixes are suppressed. |
741 | */ |
742 | public String stringVal() { |
743 | return new String(sbuf, 0, sp); |
744 | } |
745 | |
746 | /** Read token. |
747 | */ |
748 | public void nextToken() { |
749 | |
750 | try { |
751 | prevEndPos = endPos; |
752 | sp = 0; |
753 | |
754 | while (true) { |
755 | pos = bp; |
756 | switch (ch) { |
757 | case ' ': // (Spec 3.6) |
758 | case '\t': // (Spec 3.6) |
759 | case FF: // (Spec 3.6) |
760 | do { |
761 | scanChar(); |
762 | } while (ch == ' ' || ch == '\t' || ch == FF); |
763 | endPos = bp; |
764 | processWhiteSpace(); |
765 | break; |
766 | case LF: // (Spec 3.4) |
767 | scanChar(); |
768 | endPos = bp; |
769 | processLineTerminator(); |
770 | break; |
771 | case CR: // (Spec 3.4) |
772 | scanChar(); |
773 | if (ch == LF) { |
774 | scanChar(); |
775 | } |
776 | endPos = bp; |
777 | processLineTerminator(); |
778 | break; |
779 | case 'A': case 'B': case 'C': case 'D': case 'E': |
780 | case 'F': case 'G': case 'H': case 'I': case 'J': |
781 | case 'K': case 'L': case 'M': case 'N': case 'O': |
782 | case 'P': case 'Q': case 'R': case 'S': case 'T': |
783 | case 'U': case 'V': case 'W': case 'X': case 'Y': |
784 | case 'Z': |
785 | case 'a': case 'b': case 'c': case 'd': case 'e': |
786 | case 'f': case 'g': case 'h': case 'i': case 'j': |
787 | case 'k': case 'l': case 'm': case 'n': case 'o': |
788 | case 'p': case 'q': case 'r': case 's': case 't': |
789 | case 'u': case 'v': case 'w': case 'x': case 'y': |
790 | case 'z': |
791 | case '$': case '_': |
792 | scanIdent(); |
793 | return; |
794 | case '0': |
795 | scanChar(); |
796 | if (ch == 'x' || ch == 'X') { |
797 | scanChar(); |
798 | if (ch == '.') { |
799 | scanHexFractionAndSuffix(false); |
800 | } else if (digit(16) < 0) { |
801 | lexError("invalid.hex.number"); |
802 | } else { |
803 | scanNumber(16); |
804 | } |
805 | } else { |
806 | putChar('0'); |
807 | scanNumber(8); |
808 | } |
809 | return; |
810 | case '1': case '2': case '3': case '4': |
811 | case '5': case '6': case '7': case '8': case '9': |
812 | scanNumber(10); |
813 | return; |
814 | case '.': |
815 | scanChar(); |
816 | if ('0' <= ch && ch <= '9') { |
817 | putChar('.'); |
818 | scanFractionAndSuffix(); |
819 | } else if (ch == '.') { |
820 | putChar('.'); putChar('.'); |
821 | scanChar(); |
822 | if (ch == '.') { |
823 | scanChar(); |
824 | putChar('.'); |
825 | token = ELLIPSIS; |
826 | } else { |
827 | lexError("malformed.fp.lit"); |
828 | } |
829 | } else { |
830 | token = DOT; |
831 | } |
832 | return; |
833 | case ',': |
834 | scanChar(); token = COMMA; return; |
835 | case ';': |
836 | scanChar(); token = SEMI; return; |
837 | case '(': |
838 | scanChar(); token = LPAREN; return; |
839 | case ')': |
840 | scanChar(); token = RPAREN; return; |
841 | case '[': |
842 | scanChar(); token = LBRACKET; return; |
843 | case ']': |
844 | scanChar(); token = RBRACKET; return; |
845 | case '{': |
846 | scanChar(); token = LBRACE; return; |
847 | case '}': |
848 | scanChar(); token = RBRACE; return; |
849 | case '/': |
850 | scanChar(); |
851 | if (ch == '/') { |
852 | do { |
853 | scanCommentChar(); |
854 | } while (ch != CR && ch != LF && bp < buflen); |
855 | if (bp < buflen) { |
856 | endPos = bp; |
857 | processComment(CommentStyle.LINE); |
858 | } |
859 | break; |
860 | } else if (ch == '*') { |
861 | scanChar(); |
862 | CommentStyle style; |
863 | if (ch == '*') { |
864 | style = CommentStyle.JAVADOC; |
865 | scanDocComment(); |
866 | } else { |
867 | style = CommentStyle.BLOCK; |
868 | while (bp < buflen) { |
869 | if (ch == '*') { |
870 | scanChar(); |
871 | if (ch == '/') break; |
872 | } else { |
873 | scanCommentChar(); |
874 | } |
875 | } |
876 | } |
877 | if (ch == '/') { |
878 | scanChar(); |
879 | endPos = bp; |
880 | processComment(style); |
881 | break; |
882 | } else { |
883 | lexError("unclosed.comment"); |
884 | return; |
885 | } |
886 | } else if (ch == '=') { |
887 | name = names.slashequals; |
888 | token = SLASHEQ; |
889 | scanChar(); |
890 | } else { |
891 | name = names.slash; |
892 | token = SLASH; |
893 | } |
894 | return; |
895 | case '\'': |
896 | scanChar(); |
897 | if (ch == '\'') { |
898 | lexError("empty.char.lit"); |
899 | } else { |
900 | if (ch == CR || ch == LF) |
901 | lexError(pos, "illegal.line.end.in.char.lit"); |
902 | scanLitChar(); |
903 | if (ch == '\'') { |
904 | scanChar(); |
905 | token = CHARLITERAL; |
906 | } else { |
907 | lexError(pos, "unclosed.char.lit"); |
908 | } |
909 | } |
910 | return; |
911 | case '\"': |
912 | scanChar(); |
913 | while (ch != '\"' && ch != CR && ch != LF && bp < buflen) |
914 | scanLitChar(); |
915 | if (ch == '\"') { |
916 | token = STRINGLITERAL; |
917 | scanChar(); |
918 | } else { |
919 | lexError(pos, "unclosed.str.lit"); |
920 | } |
921 | return; |
922 | default: |
923 | if (isSpecial(ch)) { |
924 | scanOperator(); |
925 | } else { |
926 | boolean isJavaIdentifierStart; |
927 | if (ch < '\u0080') { |
928 | // all ASCII range chars already handled, above |
929 | isJavaIdentifierStart = false; |
930 | } else { |
931 | char high = scanSurrogates(); |
932 | if (high != 0) { |
933 | if (sp == sbuf.length) { |
934 | putChar(high); |
935 | } else { |
936 | sbuf[sp++] = high; |
937 | } |
938 | |
939 | isJavaIdentifierStart = Character.isJavaIdentifierStart( |
940 | Character.toCodePoint(high, ch)); |
941 | } else { |
942 | isJavaIdentifierStart = Character.isJavaIdentifierStart(ch); |
943 | } |
944 | } |
945 | if (isJavaIdentifierStart) { |
946 | scanIdent(); |
947 | } else if (bp == buflen || ch == EOI && bp+1 == buflen) { // JLS 3.5 |
948 | token = EOF; |
949 | pos = bp = eofPos; |
950 | } else { |
951 | lexError("illegal.char", String.valueOf((int)ch)); |
952 | scanChar(); |
953 | } |
954 | } |
955 | return; |
956 | } |
957 | } |
958 | } finally { |
959 | endPos = bp; |
960 | if (scannerDebug) |
961 | System.out.println("nextToken(" + pos |
962 | + "," + endPos + ")=|" + |
963 | new String(getRawCharacters(pos, endPos)) |
964 | + "|"); |
965 | } |
966 | } |
967 | |
968 | /** Return the current token, set by nextToken(). |
969 | */ |
970 | public Token token() { |
971 | return token; |
972 | } |
973 | |
974 | /** Sets the current token. |
975 | */ |
976 | public void token(Token token) { |
977 | this.token = token; |
978 | } |
979 | |
980 | /** Return the current token's position: a 0-based |
981 | * offset from beginning of the raw input stream |
982 | * (before unicode translation) |
983 | */ |
984 | public int pos() { |
985 | return pos; |
986 | } |
987 | |
988 | /** Return the last character position of the current token. |
989 | */ |
990 | public int endPos() { |
991 | return endPos; |
992 | } |
993 | |
994 | /** Return the last character position of the previous token. |
995 | */ |
996 | public int prevEndPos() { |
997 | return prevEndPos; |
998 | } |
999 | |
1000 | /** Return the position where a lexical error occurred; |
1001 | */ |
1002 | public int errPos() { |
1003 | return errPos; |
1004 | } |
1005 | |
1006 | /** Set the position where a lexical error occurred; |
1007 | */ |
1008 | public void errPos(int pos) { |
1009 | errPos = pos; |
1010 | } |
1011 | |
1012 | /** Return the name of an identifier or token for the current token. |
1013 | */ |
1014 | public Name name() { |
1015 | return name; |
1016 | } |
1017 | |
1018 | /** Return the radix of a numeric literal token. |
1019 | */ |
1020 | public int radix() { |
1021 | return radix; |
1022 | } |
1023 | |
1024 | /** Has a @deprecated been encountered in last doc comment? |
1025 | * This needs to be reset by client with resetDeprecatedFlag. |
1026 | */ |
1027 | public boolean deprecatedFlag() { |
1028 | return deprecatedFlag; |
1029 | } |
1030 | |
1031 | public void resetDeprecatedFlag() { |
1032 | deprecatedFlag = false; |
1033 | } |
1034 | |
1035 | /** |
1036 | * Returns the documentation string of the current token. |
1037 | */ |
1038 | public String docComment() { |
1039 | return null; |
1040 | } |
1041 | |
1042 | /** |
1043 | * Returns a copy of the input buffer, up to its inputLength. |
1044 | * Unicode escape sequences are not translated. |
1045 | */ |
1046 | public char[] getRawCharacters() { |
1047 | char[] chars = new char[buflen]; |
1048 | System.arraycopy(buf, 0, chars, 0, buflen); |
1049 | return chars; |
1050 | } |
1051 | |
1052 | /** |
1053 | * Returns a copy of a character array subset of the input buffer. |
1054 | * The returned array begins at the <code>beginIndex</code> and |
1055 | * extends to the character at index <code>endIndex - 1</code>. |
1056 | * Thus the length of the substring is <code>endIndex-beginIndex</code>. |
1057 | * This behavior is like |
1058 | * <code>String.substring(beginIndex, endIndex)</code>. |
1059 | * Unicode escape sequences are not translated. |
1060 | * |
1061 | * @param beginIndex the beginning index, inclusive. |
1062 | * @param endIndex the ending index, exclusive. |
1063 | * @throws IndexOutOfBounds if either offset is outside of the |
1064 | * array bounds |
1065 | */ |
1066 | public char[] getRawCharacters(int beginIndex, int endIndex) { |
1067 | int length = endIndex - beginIndex; |
1068 | char[] chars = new char[length]; |
1069 | System.arraycopy(buf, beginIndex, chars, 0, length); |
1070 | return chars; |
1071 | } |
1072 | |
1073 | public enum CommentStyle { |
1074 | LINE, |
1075 | BLOCK, |
1076 | JAVADOC, |
1077 | } |
1078 | |
1079 | /** |
1080 | * Called when a complete comment has been scanned. pos and endPos |
1081 | * will mark the comment boundary. |
1082 | */ |
1083 | protected void processComment(CommentStyle style) { |
1084 | if (scannerDebug) |
1085 | System.out.println("processComment(" + pos |
1086 | + "," + endPos + "," + style + ")=|" |
1087 | + new String(getRawCharacters(pos, endPos)) |
1088 | + "|"); |
1089 | } |
1090 | |
1091 | /** |
1092 | * Called when a complete whitespace run has been scanned. pos and endPos |
1093 | * will mark the whitespace boundary. |
1094 | */ |
1095 | protected void processWhiteSpace() { |
1096 | if (scannerDebug) |
1097 | System.out.println("processWhitespace(" + pos |
1098 | + "," + endPos + ")=|" + |
1099 | new String(getRawCharacters(pos, endPos)) |
1100 | + "|"); |
1101 | } |
1102 | |
1103 | /** |
1104 | * Called when a line terminator has been processed. |
1105 | */ |
1106 | protected void processLineTerminator() { |
1107 | if (scannerDebug) |
1108 | System.out.println("processTerminator(" + pos |
1109 | + "," + endPos + ")=|" + |
1110 | new String(getRawCharacters(pos, endPos)) |
1111 | + "|"); |
1112 | } |
1113 | |
1114 | /** Build a map for translating between line numbers and |
1115 | * positions in the input. |
1116 | * |
1117 | * @return a LineMap */ |
1118 | public Position.LineMap getLineMap() { |
1119 | return Position.makeLineMap(buf, buflen, false); |
1120 | } |
1121 | |
1122 | } |