| 1 | /* |
| 2 | * Copyright 1999-2006 Sun Microsystems, Inc. All Rights Reserved. |
| 3 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
| 4 | * |
| 5 | * This code is free software; you can redistribute it and/or modify it |
| 6 | * under the terms of the GNU General Public License version 2 only, as |
| 7 | * published by the Free Software Foundation. Sun designates this |
| 8 | * particular file as subject to the "Classpath" exception as provided |
| 9 | * by Sun in the LICENSE file that accompanied this code. |
| 10 | * |
| 11 | * This code is distributed in the hope that it will be useful, but WITHOUT |
| 12 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
| 13 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
| 14 | * version 2 for more details (a copy is included in the LICENSE file that |
| 15 | * accompanied this code). |
| 16 | * |
| 17 | * You should have received a copy of the GNU General Public License version |
| 18 | * 2 along with this work; if not, write to the Free Software Foundation, |
| 19 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
| 20 | * |
| 21 | * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, |
| 22 | * CA 95054 USA or visit www.sun.com if you need additional information or |
| 23 | * have any questions. |
| 24 | */ |
| 25 | |
| 26 | package com.sun.tools.javac.parser; |
| 27 | |
| 28 | import java.io.*; |
| 29 | import java.nio.*; |
| 30 | import java.nio.ByteBuffer; |
| 31 | import java.nio.charset.*; |
| 32 | import java.nio.channels.*; |
| 33 | import java.util.regex.*; |
| 34 | |
| 35 | import com.sun.tools.javac.util.*; |
| 36 | |
| 37 | import com.sun.tools.javac.code.Source; |
| 38 | |
| 39 | import static com.sun.tools.javac.parser.Token.*; |
| 40 | import static com.sun.tools.javac.util.LayoutCharacters.*; |
| 41 | |
| 42 | /** The lexical analyzer maps an input stream consisting of |
| 43 | * ASCII characters and Unicode escapes into a token sequence. |
| 44 | * |
| 45 | * <p><b>This is NOT part of any API supported by Sun Microsystems. If |
| 46 | * you write code that depends on this, you do so at your own risk. |
| 47 | * This code and its internal interfaces are subject to change or |
| 48 | * deletion without notice.</b> |
| 49 | */ |
| 50 | public class Scanner implements Lexer { |
| 51 | |
| 52 | private static boolean scannerDebug = false; |
| 53 | |
| 54 | /** A factory for creating scanners. */ |
| 55 | public static class Factory { |
| 56 | /** The context key for the scanner factory. */ |
| 57 | public static final Context.Key<Scanner.Factory> scannerFactoryKey = |
| 58 | new Context.Key<Scanner.Factory>(); |
| 59 | |
| 60 | /** Get the Factory instance for this context. */ |
| 61 | public static Factory instance(Context context) { |
| 62 | Factory instance = context.get(scannerFactoryKey); |
| 63 | if (instance == null) |
| 64 | instance = new Factory(context); |
| 65 | return instance; |
| 66 | } |
| 67 | |
| 68 | final Log log; |
| 69 | final Name.Table names; |
| 70 | final Source source; |
| 71 | final Keywords keywords; |
| 72 | |
| 73 | /** Create a new scanner factory. */ |
| 74 | protected Factory(Context context) { |
| 75 | context.put(scannerFactoryKey, this); |
| 76 | this.log = Log.instance(context); |
| 77 | this.names = Name.Table.instance(context); |
| 78 | this.source = Source.instance(context); |
| 79 | this.keywords = Keywords.instance(context); |
| 80 | } |
| 81 | |
| 82 | public Scanner newScanner(CharSequence input) { |
| 83 | if (input instanceof CharBuffer) { |
| 84 | return new Scanner(this, (CharBuffer)input); |
| 85 | } else { |
| 86 | char[] array = input.toString().toCharArray(); |
| 87 | return newScanner(array, array.length); |
| 88 | } |
| 89 | } |
| 90 | |
| 91 | public Scanner newScanner(char[] input, int inputLength) { |
| 92 | return new Scanner(this, input, inputLength); |
| 93 | } |
| 94 | } |
| 95 | |
| 96 | /* Output variables; set by nextToken(): |
| 97 | */ |
| 98 | |
| 99 | /** The token, set by nextToken(). |
| 100 | */ |
| 101 | private Token token; |
| 102 | |
| 103 | /** Allow hex floating-point literals. |
| 104 | */ |
| 105 | private boolean allowHexFloats; |
| 106 | |
| 107 | /** The token's position, 0-based offset from beginning of text. |
| 108 | */ |
| 109 | private int pos; |
| 110 | |
| 111 | /** Character position just after the last character of the token. |
| 112 | */ |
| 113 | private int endPos; |
| 114 | |
| 115 | /** The last character position of the previous token. |
| 116 | */ |
| 117 | private int prevEndPos; |
| 118 | |
| 119 | /** The position where a lexical error occurred; |
| 120 | */ |
| 121 | private int errPos = Position.NOPOS; |
| 122 | |
| 123 | /** The name of an identifier or token: |
| 124 | */ |
| 125 | private Name name; |
| 126 | |
| 127 | /** The radix of a numeric literal token. |
| 128 | */ |
| 129 | private int radix; |
| 130 | |
| 131 | /** Has a @deprecated been encountered in last doc comment? |
| 132 | * this needs to be reset by client. |
| 133 | */ |
| 134 | protected boolean deprecatedFlag = false; |
| 135 | |
| 136 | /** A character buffer for literals. |
| 137 | */ |
| 138 | private char[] sbuf = new char[128]; |
| 139 | private int sp; |
| 140 | |
| 141 | /** The input buffer, index of next chacter to be read, |
| 142 | * index of one past last character in buffer. |
| 143 | */ |
| 144 | private char[] buf; |
| 145 | private int bp; |
| 146 | private int buflen; |
| 147 | private int eofPos; |
| 148 | |
| 149 | /** The current character. |
| 150 | */ |
| 151 | private char ch; |
| 152 | |
| 153 | /** The buffer index of the last converted unicode character |
| 154 | */ |
| 155 | private int unicodeConversionBp = -1; |
| 156 | |
| 157 | /** The log to be used for error reporting. |
| 158 | */ |
| 159 | private final Log log; |
| 160 | |
| 161 | /** The name table. */ |
| 162 | private final Name.Table names; |
| 163 | |
| 164 | /** The keyword table. */ |
| 165 | private final Keywords keywords; |
| 166 | |
| 167 | /** Common code for constructors. */ |
| 168 | private Scanner(Factory fac) { |
| 169 | this.log = fac.log; |
| 170 | this.names = fac.names; |
| 171 | this.keywords = fac.keywords; |
| 172 | this.allowHexFloats = fac.source.allowHexFloats(); |
| 173 | } |
| 174 | |
| 175 | private static final boolean hexFloatsWork = hexFloatsWork(); |
| 176 | private static boolean hexFloatsWork() { |
| 177 | try { |
| 178 | Float.valueOf("0x1.0p1"); |
| 179 | return true; |
| 180 | } catch (NumberFormatException ex) { |
| 181 | return false; |
| 182 | } |
| 183 | } |
| 184 | |
| 185 | /** Create a scanner from the input buffer. buffer must implement |
| 186 | * array() and compact(), and remaining() must be less than limit(). |
| 187 | */ |
| 188 | protected Scanner(Factory fac, CharBuffer buffer) { |
| 189 | this(fac, JavacFileManager.toArray(buffer), buffer.limit()); |
| 190 | } |
| 191 | |
| 192 | /** |
| 193 | * Create a scanner from the input array. This method might |
| 194 | * modify the array. To avoid copying the input array, ensure |
| 195 | * that {@code inputLength < input.length} or |
| 196 | * {@code input[input.length -1]} is a white space character. |
| 197 | * |
| 198 | * @param fac the factory which created this Scanner |
| 199 | * @param input the input, might be modified |
| 200 | * @param inputLength the size of the input. |
| 201 | * Must be positive and less than or equal to input.length. |
| 202 | */ |
| 203 | protected Scanner(Factory fac, char[] input, int inputLength) { |
| 204 | this(fac); |
| 205 | eofPos = inputLength; |
| 206 | if (inputLength == input.length) { |
| 207 | if (input.length > 0 && Character.isWhitespace(input[input.length - 1])) { |
| 208 | inputLength--; |
| 209 | } else { |
| 210 | char[] newInput = new char[inputLength + 1]; |
| 211 | System.arraycopy(input, 0, newInput, 0, input.length); |
| 212 | input = newInput; |
| 213 | } |
| 214 | } |
| 215 | buf = input; |
| 216 | buflen = inputLength; |
| 217 | buf[buflen] = EOI; |
| 218 | bp = -1; |
| 219 | scanChar(); |
| 220 | } |
| 221 | |
| 222 | /** Report an error at the given position using the provided arguments. |
| 223 | */ |
| 224 | private void lexError(int pos, String key, Object... args) { |
| 225 | log.error(pos, key, args); |
| 226 | token = ERROR; |
| 227 | errPos = pos; |
| 228 | } |
| 229 | |
| 230 | /** Report an error at the current token position using the provided |
| 231 | * arguments. |
| 232 | */ |
| 233 | private void lexError(String key, Object... args) { |
| 234 | lexError(pos, key, args); |
| 235 | } |
| 236 | |
| 237 | /** Convert an ASCII digit from its base (8, 10, or 16) |
| 238 | * to its value. |
| 239 | */ |
| 240 | private int digit(int base) { |
| 241 | char c = ch; |
| 242 | int result = Character.digit(c, base); |
| 243 | if (result >= 0 && c > 0x7f) { |
| 244 | lexError(pos+1, "illegal.nonascii.digit"); |
| 245 | ch = "0123456789abcdef".charAt(result); |
| 246 | } |
| 247 | return result; |
| 248 | } |
| 249 | |
| 250 | /** Convert unicode escape; bp points to initial '\' character |
| 251 | * (Spec 3.3). |
| 252 | */ |
| 253 | private void convertUnicode() { |
| 254 | if (ch == '\\' && unicodeConversionBp != bp) { |
| 255 | bp++; ch = buf[bp]; |
| 256 | if (ch == 'u') { |
| 257 | do { |
| 258 | bp++; ch = buf[bp]; |
| 259 | } while (ch == 'u'); |
| 260 | int limit = bp + 3; |
| 261 | if (limit < buflen) { |
| 262 | int d = digit(16); |
| 263 | int code = d; |
| 264 | while (bp < limit && d >= 0) { |
| 265 | bp++; ch = buf[bp]; |
| 266 | d = digit(16); |
| 267 | code = (code << 4) + d; |
| 268 | } |
| 269 | if (d >= 0) { |
| 270 | ch = (char)code; |
| 271 | unicodeConversionBp = bp; |
| 272 | return; |
| 273 | } |
| 274 | } |
| 275 | lexError(bp, "illegal.unicode.esc"); |
| 276 | } else { |
| 277 | bp--; |
| 278 | ch = '\\'; |
| 279 | } |
| 280 | } |
| 281 | } |
| 282 | |
| 283 | /** Read next character. |
| 284 | */ |
| 285 | private void scanChar() { |
| 286 | ch = buf[++bp]; |
| 287 | if (ch == '\\') { |
| 288 | convertUnicode(); |
| 289 | } |
| 290 | } |
| 291 | |
| 292 | /** Read next character in comment, skipping over double '\' characters. |
| 293 | */ |
| 294 | private void scanCommentChar() { |
| 295 | scanChar(); |
| 296 | if (ch == '\\') { |
| 297 | if (buf[bp+1] == '\\' && unicodeConversionBp != bp) { |
| 298 | bp++; |
| 299 | } else { |
| 300 | convertUnicode(); |
| 301 | } |
| 302 | } |
| 303 | } |
| 304 | |
| 305 | /** Append a character to sbuf. |
| 306 | */ |
| 307 | private void putChar(char ch) { |
| 308 | if (sp == sbuf.length) { |
| 309 | char[] newsbuf = new char[sbuf.length * 2]; |
| 310 | System.arraycopy(sbuf, 0, newsbuf, 0, sbuf.length); |
| 311 | sbuf = newsbuf; |
| 312 | } |
| 313 | sbuf[sp++] = ch; |
| 314 | } |
| 315 | |
| 316 | /** For debugging purposes: print character. |
| 317 | */ |
| 318 | private void dch() { |
| 319 | System.err.print(ch); System.out.flush(); |
| 320 | } |
| 321 | |
| 322 | /** Read next character in character or string literal and copy into sbuf. |
| 323 | */ |
| 324 | private void scanLitChar() { |
| 325 | if (ch == '\\') { |
| 326 | if (buf[bp+1] == '\\' && unicodeConversionBp != bp) { |
| 327 | bp++; |
| 328 | putChar('\\'); |
| 329 | scanChar(); |
| 330 | } else { |
| 331 | scanChar(); |
| 332 | switch (ch) { |
| 333 | case '0': case '1': case '2': case '3': |
| 334 | case '4': case '5': case '6': case '7': |
| 335 | char leadch = ch; |
| 336 | int oct = digit(8); |
| 337 | scanChar(); |
| 338 | if ('0' <= ch && ch <= '7') { |
| 339 | oct = oct * 8 + digit(8); |
| 340 | scanChar(); |
| 341 | if (leadch <= '3' && '0' <= ch && ch <= '7') { |
| 342 | oct = oct * 8 + digit(8); |
| 343 | scanChar(); |
| 344 | } |
| 345 | } |
| 346 | putChar((char)oct); |
| 347 | break; |
| 348 | case 'b': |
| 349 | putChar('\b'); scanChar(); break; |
| 350 | case 't': |
| 351 | putChar('\t'); scanChar(); break; |
| 352 | case 'n': |
| 353 | putChar('\n'); scanChar(); break; |
| 354 | case 'f': |
| 355 | putChar('\f'); scanChar(); break; |
| 356 | case 'r': |
| 357 | putChar('\r'); scanChar(); break; |
| 358 | case '\'': |
| 359 | putChar('\''); scanChar(); break; |
| 360 | case '\"': |
| 361 | putChar('\"'); scanChar(); break; |
| 362 | case '\\': |
| 363 | putChar('\\'); scanChar(); break; |
| 364 | default: |
| 365 | lexError(bp, "illegal.esc.char"); |
| 366 | } |
| 367 | } |
| 368 | } else if (bp != buflen) { |
| 369 | putChar(ch); scanChar(); |
| 370 | } |
| 371 | } |
| 372 | |
| 373 | /** Read fractional part of hexadecimal floating point number. |
| 374 | */ |
| 375 | private void scanHexExponentAndSuffix() { |
| 376 | if (ch == 'p' || ch == 'P') { |
| 377 | putChar(ch); |
| 378 | scanChar(); |
| 379 | if (ch == '+' || ch == '-') { |
| 380 | putChar(ch); |
| 381 | scanChar(); |
| 382 | } |
| 383 | if ('0' <= ch && ch <= '9') { |
| 384 | do { |
| 385 | putChar(ch); |
| 386 | scanChar(); |
| 387 | } while ('0' <= ch && ch <= '9'); |
| 388 | if (!allowHexFloats) { |
| 389 | lexError("unsupported.fp.lit"); |
| 390 | allowHexFloats = true; |
| 391 | } |
| 392 | else if (!hexFloatsWork) |
| 393 | lexError("unsupported.cross.fp.lit"); |
| 394 | } else |
| 395 | lexError("malformed.fp.lit"); |
| 396 | } else { |
| 397 | lexError("malformed.fp.lit"); |
| 398 | } |
| 399 | if (ch == 'f' || ch == 'F') { |
| 400 | putChar(ch); |
| 401 | scanChar(); |
| 402 | token = FLOATLITERAL; |
| 403 | } else { |
| 404 | if (ch == 'd' || ch == 'D') { |
| 405 | putChar(ch); |
| 406 | scanChar(); |
| 407 | } |
| 408 | token = DOUBLELITERAL; |
| 409 | } |
| 410 | } |
| 411 | |
| 412 | /** Read fractional part of floating point number. |
| 413 | */ |
| 414 | private void scanFraction() { |
| 415 | while (digit(10) >= 0) { |
| 416 | putChar(ch); |
| 417 | scanChar(); |
| 418 | } |
| 419 | int sp1 = sp; |
| 420 | if (ch == 'e' || ch == 'E') { |
| 421 | putChar(ch); |
| 422 | scanChar(); |
| 423 | if (ch == '+' || ch == '-') { |
| 424 | putChar(ch); |
| 425 | scanChar(); |
| 426 | } |
| 427 | if ('0' <= ch && ch <= '9') { |
| 428 | do { |
| 429 | putChar(ch); |
| 430 | scanChar(); |
| 431 | } while ('0' <= ch && ch <= '9'); |
| 432 | return; |
| 433 | } |
| 434 | lexError("malformed.fp.lit"); |
| 435 | sp = sp1; |
| 436 | } |
| 437 | } |
| 438 | |
| 439 | /** Read fractional part and 'd' or 'f' suffix of floating point number. |
| 440 | */ |
| 441 | private void scanFractionAndSuffix() { |
| 442 | this.radix = 10; |
| 443 | scanFraction(); |
| 444 | if (ch == 'f' || ch == 'F') { |
| 445 | putChar(ch); |
| 446 | scanChar(); |
| 447 | token = FLOATLITERAL; |
| 448 | } else { |
| 449 | if (ch == 'd' || ch == 'D') { |
| 450 | putChar(ch); |
| 451 | scanChar(); |
| 452 | } |
| 453 | token = DOUBLELITERAL; |
| 454 | } |
| 455 | } |
| 456 | |
| 457 | /** Read fractional part and 'd' or 'f' suffix of floating point number. |
| 458 | */ |
| 459 | private void scanHexFractionAndSuffix(boolean seendigit) { |
| 460 | this.radix = 16; |
| 461 | assert ch == '.'; |
| 462 | putChar(ch); |
| 463 | scanChar(); |
| 464 | while (digit(16) >= 0) { |
| 465 | seendigit = true; |
| 466 | putChar(ch); |
| 467 | scanChar(); |
| 468 | } |
| 469 | if (!seendigit) |
| 470 | lexError("invalid.hex.number"); |
| 471 | else |
| 472 | scanHexExponentAndSuffix(); |
| 473 | } |
| 474 | |
| 475 | /** Read a number. |
| 476 | * @param radix The radix of the number; one of 8, 10, 16. |
| 477 | */ |
| 478 | private void scanNumber(int radix) { |
| 479 | this.radix = radix; |
| 480 | // for octal, allow base-10 digit in case it's a float literal |
| 481 | int digitRadix = (radix <= 10) ? 10 : 16; |
| 482 | boolean seendigit = false; |
| 483 | while (digit(digitRadix) >= 0) { |
| 484 | seendigit = true; |
| 485 | putChar(ch); |
| 486 | scanChar(); |
| 487 | } |
| 488 | if (radix == 16 && ch == '.') { |
| 489 | scanHexFractionAndSuffix(seendigit); |
| 490 | } else if (seendigit && radix == 16 && (ch == 'p' || ch == 'P')) { |
| 491 | scanHexExponentAndSuffix(); |
| 492 | } else if (radix <= 10 && ch == '.') { |
| 493 | putChar(ch); |
| 494 | scanChar(); |
| 495 | scanFractionAndSuffix(); |
| 496 | } else if (radix <= 10 && |
| 497 | (ch == 'e' || ch == 'E' || |
| 498 | ch == 'f' || ch == 'F' || |
| 499 | ch == 'd' || ch == 'D')) { |
| 500 | scanFractionAndSuffix(); |
| 501 | } else { |
| 502 | if (ch == 'l' || ch == 'L') { |
| 503 | scanChar(); |
| 504 | token = LONGLITERAL; |
| 505 | } else { |
| 506 | token = INTLITERAL; |
| 507 | } |
| 508 | } |
| 509 | } |
| 510 | |
| 511 | /** Read an identifier. |
| 512 | */ |
| 513 | private void scanIdent() { |
| 514 | boolean isJavaIdentifierPart; |
| 515 | char high; |
| 516 | do { |
| 517 | if (sp == sbuf.length) putChar(ch); else sbuf[sp++] = ch; |
| 518 | // optimization, was: putChar(ch); |
| 519 | |
| 520 | scanChar(); |
| 521 | switch (ch) { |
| 522 | case 'A': case 'B': case 'C': case 'D': case 'E': |
| 523 | case 'F': case 'G': case 'H': case 'I': case 'J': |
| 524 | case 'K': case 'L': case 'M': case 'N': case 'O': |
| 525 | case 'P': case 'Q': case 'R': case 'S': case 'T': |
| 526 | case 'U': case 'V': case 'W': case 'X': case 'Y': |
| 527 | case 'Z': |
| 528 | case 'a': case 'b': case 'c': case 'd': case 'e': |
| 529 | case 'f': case 'g': case 'h': case 'i': case 'j': |
| 530 | case 'k': case 'l': case 'm': case 'n': case 'o': |
| 531 | case 'p': case 'q': case 'r': case 's': case 't': |
| 532 | case 'u': case 'v': case 'w': case 'x': case 'y': |
| 533 | case 'z': |
| 534 | case '$': case '_': |
| 535 | case '0': case '1': case '2': case '3': case '4': |
| 536 | case '5': case '6': case '7': case '8': case '9': |
| 537 | case '\u0000': case '\u0001': case '\u0002': case '\u0003': |
| 538 | case '\u0004': case '\u0005': case '\u0006': case '\u0007': |
| 539 | case '\u0008': case '\u000E': case '\u000F': case '\u0010': |
| 540 | case '\u0011': case '\u0012': case '\u0013': case '\u0014': |
| 541 | case '\u0015': case '\u0016': case '\u0017': |
| 542 | case '\u0018': case '\u0019': case '\u001B': |
| 543 | case '\u007F': |
| 544 | break; |
| 545 | case '\u001A': // EOI is also a legal identifier part |
| 546 | if (bp >= buflen) { |
| 547 | name = names.fromChars(sbuf, 0, sp); |
| 548 | token = keywords.key(name); |
| 549 | return; |
| 550 | } |
| 551 | break; |
| 552 | default: |
| 553 | if (ch < '\u0080') { |
| 554 | // all ASCII range chars already handled, above |
| 555 | isJavaIdentifierPart = false; |
| 556 | } else { |
| 557 | high = scanSurrogates(); |
| 558 | if (high != 0) { |
| 559 | if (sp == sbuf.length) { |
| 560 | putChar(high); |
| 561 | } else { |
| 562 | sbuf[sp++] = high; |
| 563 | } |
| 564 | isJavaIdentifierPart = Character.isJavaIdentifierPart( |
| 565 | Character.toCodePoint(high, ch)); |
| 566 | } else { |
| 567 | isJavaIdentifierPart = Character.isJavaIdentifierPart(ch); |
| 568 | } |
| 569 | } |
| 570 | if (!isJavaIdentifierPart) { |
| 571 | name = names.fromChars(sbuf, 0, sp); |
| 572 | token = keywords.key(name); |
| 573 | return; |
| 574 | } |
| 575 | } |
| 576 | } while (true); |
| 577 | } |
| 578 | |
| 579 | /** Are surrogates supported? |
| 580 | */ |
| 581 | final static boolean surrogatesSupported = surrogatesSupported(); |
| 582 | private static boolean surrogatesSupported() { |
| 583 | try { |
| 584 | Character.isHighSurrogate('a'); |
| 585 | return true; |
| 586 | } catch (NoSuchMethodError ex) { |
| 587 | return false; |
| 588 | } |
| 589 | } |
| 590 | |
| 591 | /** Scan surrogate pairs. If 'ch' is a high surrogate and |
| 592 | * the next character is a low surrogate, then put the low |
| 593 | * surrogate in 'ch', and return the high surrogate. |
| 594 | * otherwise, just return 0. |
| 595 | */ |
| 596 | private char scanSurrogates() { |
| 597 | if (surrogatesSupported && Character.isHighSurrogate(ch)) { |
| 598 | char high = ch; |
| 599 | |
| 600 | scanChar(); |
| 601 | |
| 602 | if (Character.isLowSurrogate(ch)) { |
| 603 | return high; |
| 604 | } |
| 605 | |
| 606 | ch = high; |
| 607 | } |
| 608 | |
| 609 | return 0; |
| 610 | } |
| 611 | |
| 612 | /** Return true if ch can be part of an operator. |
| 613 | */ |
| 614 | private boolean isSpecial(char ch) { |
| 615 | switch (ch) { |
| 616 | case '!': case '%': case '&': case '*': case '?': |
| 617 | case '+': case '-': case ':': case '<': case '=': |
| 618 | case '>': case '^': case '|': case '~': |
| 619 | case '@': |
| 620 | return true; |
| 621 | default: |
| 622 | return false; |
| 623 | } |
| 624 | } |
| 625 | |
| 626 | /** Read longest possible sequence of special characters and convert |
| 627 | * to token. |
| 628 | */ |
| 629 | private void scanOperator() { |
| 630 | while (true) { |
| 631 | putChar(ch); |
| 632 | Name newname = names.fromChars(sbuf, 0, sp); |
| 633 | if (keywords.key(newname) == IDENTIFIER) { |
| 634 | sp--; |
| 635 | break; |
| 636 | } |
| 637 | name = newname; |
| 638 | token = keywords.key(newname); |
| 639 | scanChar(); |
| 640 | if (!isSpecial(ch)) break; |
| 641 | } |
| 642 | } |
| 643 | |
| 644 | /** |
| 645 | * Scan a documention comment; determine if a deprecated tag is present. |
| 646 | * Called once the initial /, * have been skipped, positioned at the second * |
| 647 | * (which is treated as the beginning of the first line). |
| 648 | * Stops positioned at the closing '/'. |
| 649 | */ |
| 650 | @SuppressWarnings("fallthrough") |
| 651 | private void scanDocComment() { |
| 652 | boolean deprecatedPrefix = false; |
| 653 | |
| 654 | forEachLine: |
| 655 | while (bp < buflen) { |
| 656 | |
| 657 | // Skip optional WhiteSpace at beginning of line |
| 658 | while (bp < buflen && (ch == ' ' || ch == '\t' || ch == FF)) { |
| 659 | scanCommentChar(); |
| 660 | } |
| 661 | |
| 662 | // Skip optional consecutive Stars |
| 663 | while (bp < buflen && ch == '*') { |
| 664 | scanCommentChar(); |
| 665 | if (ch == '/') { |
| 666 | return; |
| 667 | } |
| 668 | } |
| 669 | |
| 670 | // Skip optional WhiteSpace after Stars |
| 671 | while (bp < buflen && (ch == ' ' || ch == '\t' || ch == FF)) { |
| 672 | scanCommentChar(); |
| 673 | } |
| 674 | |
| 675 | deprecatedPrefix = false; |
| 676 | // At beginning of line in the JavaDoc sense. |
| 677 | if (bp < buflen && ch == '@' && !deprecatedFlag) { |
| 678 | scanCommentChar(); |
| 679 | if (bp < buflen && ch == 'd') { |
| 680 | scanCommentChar(); |
| 681 | if (bp < buflen && ch == 'e') { |
| 682 | scanCommentChar(); |
| 683 | if (bp < buflen && ch == 'p') { |
| 684 | scanCommentChar(); |
| 685 | if (bp < buflen && ch == 'r') { |
| 686 | scanCommentChar(); |
| 687 | if (bp < buflen && ch == 'e') { |
| 688 | scanCommentChar(); |
| 689 | if (bp < buflen && ch == 'c') { |
| 690 | scanCommentChar(); |
| 691 | if (bp < buflen && ch == 'a') { |
| 692 | scanCommentChar(); |
| 693 | if (bp < buflen && ch == 't') { |
| 694 | scanCommentChar(); |
| 695 | if (bp < buflen && ch == 'e') { |
| 696 | scanCommentChar(); |
| 697 | if (bp < buflen && ch == 'd') { |
| 698 | deprecatedPrefix = true; |
| 699 | scanCommentChar(); |
| 700 | }}}}}}}}}}} |
| 701 | if (deprecatedPrefix && bp < buflen) { |
| 702 | if (Character.isWhitespace(ch)) { |
| 703 | deprecatedFlag = true; |
| 704 | } else if (ch == '*') { |
| 705 | scanCommentChar(); |
| 706 | if (ch == '/') { |
| 707 | deprecatedFlag = true; |
| 708 | return; |
| 709 | } |
| 710 | } |
| 711 | } |
| 712 | |
| 713 | // Skip rest of line |
| 714 | while (bp < buflen) { |
| 715 | switch (ch) { |
| 716 | case '*': |
| 717 | scanCommentChar(); |
| 718 | if (ch == '/') { |
| 719 | return; |
| 720 | } |
| 721 | break; |
| 722 | case CR: // (Spec 3.4) |
| 723 | scanCommentChar(); |
| 724 | if (ch != LF) { |
| 725 | continue forEachLine; |
| 726 | } |
| 727 | /* fall through to LF case */ |
| 728 | case LF: // (Spec 3.4) |
| 729 | scanCommentChar(); |
| 730 | continue forEachLine; |
| 731 | default: |
| 732 | scanCommentChar(); |
| 733 | } |
| 734 | } // rest of line |
| 735 | } // forEachLine |
| 736 | return; |
| 737 | } |
| 738 | |
| 739 | /** The value of a literal token, recorded as a string. |
| 740 | * For integers, leading 0x and 'l' suffixes are suppressed. |
| 741 | */ |
| 742 | public String stringVal() { |
| 743 | return new String(sbuf, 0, sp); |
| 744 | } |
| 745 | |
| 746 | /** Read token. |
| 747 | */ |
| 748 | public void nextToken() { |
| 749 | |
| 750 | try { |
| 751 | prevEndPos = endPos; |
| 752 | sp = 0; |
| 753 | |
| 754 | while (true) { |
| 755 | pos = bp; |
| 756 | switch (ch) { |
| 757 | case ' ': // (Spec 3.6) |
| 758 | case '\t': // (Spec 3.6) |
| 759 | case FF: // (Spec 3.6) |
| 760 | do { |
| 761 | scanChar(); |
| 762 | } while (ch == ' ' || ch == '\t' || ch == FF); |
| 763 | endPos = bp; |
| 764 | processWhiteSpace(); |
| 765 | break; |
| 766 | case LF: // (Spec 3.4) |
| 767 | scanChar(); |
| 768 | endPos = bp; |
| 769 | processLineTerminator(); |
| 770 | break; |
| 771 | case CR: // (Spec 3.4) |
| 772 | scanChar(); |
| 773 | if (ch == LF) { |
| 774 | scanChar(); |
| 775 | } |
| 776 | endPos = bp; |
| 777 | processLineTerminator(); |
| 778 | break; |
| 779 | case 'A': case 'B': case 'C': case 'D': case 'E': |
| 780 | case 'F': case 'G': case 'H': case 'I': case 'J': |
| 781 | case 'K': case 'L': case 'M': case 'N': case 'O': |
| 782 | case 'P': case 'Q': case 'R': case 'S': case 'T': |
| 783 | case 'U': case 'V': case 'W': case 'X': case 'Y': |
| 784 | case 'Z': |
| 785 | case 'a': case 'b': case 'c': case 'd': case 'e': |
| 786 | case 'f': case 'g': case 'h': case 'i': case 'j': |
| 787 | case 'k': case 'l': case 'm': case 'n': case 'o': |
| 788 | case 'p': case 'q': case 'r': case 's': case 't': |
| 789 | case 'u': case 'v': case 'w': case 'x': case 'y': |
| 790 | case 'z': |
| 791 | case '$': case '_': |
| 792 | scanIdent(); |
| 793 | return; |
| 794 | case '0': |
| 795 | scanChar(); |
| 796 | if (ch == 'x' || ch == 'X') { |
| 797 | scanChar(); |
| 798 | if (ch == '.') { |
| 799 | scanHexFractionAndSuffix(false); |
| 800 | } else if (digit(16) < 0) { |
| 801 | lexError("invalid.hex.number"); |
| 802 | } else { |
| 803 | scanNumber(16); |
| 804 | } |
| 805 | } else { |
| 806 | putChar('0'); |
| 807 | scanNumber(8); |
| 808 | } |
| 809 | return; |
| 810 | case '1': case '2': case '3': case '4': |
| 811 | case '5': case '6': case '7': case '8': case '9': |
| 812 | scanNumber(10); |
| 813 | return; |
| 814 | case '.': |
| 815 | scanChar(); |
| 816 | if ('0' <= ch && ch <= '9') { |
| 817 | putChar('.'); |
| 818 | scanFractionAndSuffix(); |
| 819 | } else if (ch == '.') { |
| 820 | putChar('.'); putChar('.'); |
| 821 | scanChar(); |
| 822 | if (ch == '.') { |
| 823 | scanChar(); |
| 824 | putChar('.'); |
| 825 | token = ELLIPSIS; |
| 826 | } else { |
| 827 | lexError("malformed.fp.lit"); |
| 828 | } |
| 829 | } else { |
| 830 | token = DOT; |
| 831 | } |
| 832 | return; |
| 833 | case ',': |
| 834 | scanChar(); token = COMMA; return; |
| 835 | case ';': |
| 836 | scanChar(); token = SEMI; return; |
| 837 | case '(': |
| 838 | scanChar(); token = LPAREN; return; |
| 839 | case ')': |
| 840 | scanChar(); token = RPAREN; return; |
| 841 | case '[': |
| 842 | scanChar(); token = LBRACKET; return; |
| 843 | case ']': |
| 844 | scanChar(); token = RBRACKET; return; |
| 845 | case '{': |
| 846 | scanChar(); token = LBRACE; return; |
| 847 | case '}': |
| 848 | scanChar(); token = RBRACE; return; |
| 849 | case '/': |
| 850 | scanChar(); |
| 851 | if (ch == '/') { |
| 852 | do { |
| 853 | scanCommentChar(); |
| 854 | } while (ch != CR && ch != LF && bp < buflen); |
| 855 | if (bp < buflen) { |
| 856 | endPos = bp; |
| 857 | processComment(CommentStyle.LINE); |
| 858 | } |
| 859 | break; |
| 860 | } else if (ch == '*') { |
| 861 | scanChar(); |
| 862 | CommentStyle style; |
| 863 | if (ch == '*') { |
| 864 | style = CommentStyle.JAVADOC; |
| 865 | scanDocComment(); |
| 866 | } else { |
| 867 | style = CommentStyle.BLOCK; |
| 868 | while (bp < buflen) { |
| 869 | if (ch == '*') { |
| 870 | scanChar(); |
| 871 | if (ch == '/') break; |
| 872 | } else { |
| 873 | scanCommentChar(); |
| 874 | } |
| 875 | } |
| 876 | } |
| 877 | if (ch == '/') { |
| 878 | scanChar(); |
| 879 | endPos = bp; |
| 880 | processComment(style); |
| 881 | break; |
| 882 | } else { |
| 883 | lexError("unclosed.comment"); |
| 884 | return; |
| 885 | } |
| 886 | } else if (ch == '=') { |
| 887 | name = names.slashequals; |
| 888 | token = SLASHEQ; |
| 889 | scanChar(); |
| 890 | } else { |
| 891 | name = names.slash; |
| 892 | token = SLASH; |
| 893 | } |
| 894 | return; |
| 895 | case '\'': |
| 896 | scanChar(); |
| 897 | if (ch == '\'') { |
| 898 | lexError("empty.char.lit"); |
| 899 | } else { |
| 900 | if (ch == CR || ch == LF) |
| 901 | lexError(pos, "illegal.line.end.in.char.lit"); |
| 902 | scanLitChar(); |
| 903 | if (ch == '\'') { |
| 904 | scanChar(); |
| 905 | token = CHARLITERAL; |
| 906 | } else { |
| 907 | lexError(pos, "unclosed.char.lit"); |
| 908 | } |
| 909 | } |
| 910 | return; |
| 911 | case '\"': |
| 912 | scanChar(); |
| 913 | while (ch != '\"' && ch != CR && ch != LF && bp < buflen) |
| 914 | scanLitChar(); |
| 915 | if (ch == '\"') { |
| 916 | token = STRINGLITERAL; |
| 917 | scanChar(); |
| 918 | } else { |
| 919 | lexError(pos, "unclosed.str.lit"); |
| 920 | } |
| 921 | return; |
| 922 | default: |
| 923 | if (isSpecial(ch)) { |
| 924 | scanOperator(); |
| 925 | } else { |
| 926 | boolean isJavaIdentifierStart; |
| 927 | if (ch < '\u0080') { |
| 928 | // all ASCII range chars already handled, above |
| 929 | isJavaIdentifierStart = false; |
| 930 | } else { |
| 931 | char high = scanSurrogates(); |
| 932 | if (high != 0) { |
| 933 | if (sp == sbuf.length) { |
| 934 | putChar(high); |
| 935 | } else { |
| 936 | sbuf[sp++] = high; |
| 937 | } |
| 938 | |
| 939 | isJavaIdentifierStart = Character.isJavaIdentifierStart( |
| 940 | Character.toCodePoint(high, ch)); |
| 941 | } else { |
| 942 | isJavaIdentifierStart = Character.isJavaIdentifierStart(ch); |
| 943 | } |
| 944 | } |
| 945 | if (isJavaIdentifierStart) { |
| 946 | scanIdent(); |
| 947 | } else if (bp == buflen || ch == EOI && bp+1 == buflen) { // JLS 3.5 |
| 948 | token = EOF; |
| 949 | pos = bp = eofPos; |
| 950 | } else { |
| 951 | lexError("illegal.char", String.valueOf((int)ch)); |
| 952 | scanChar(); |
| 953 | } |
| 954 | } |
| 955 | return; |
| 956 | } |
| 957 | } |
| 958 | } finally { |
| 959 | endPos = bp; |
| 960 | if (scannerDebug) |
| 961 | System.out.println("nextToken(" + pos |
| 962 | + "," + endPos + ")=|" + |
| 963 | new String(getRawCharacters(pos, endPos)) |
| 964 | + "|"); |
| 965 | } |
| 966 | } |
| 967 | |
| 968 | /** Return the current token, set by nextToken(). |
| 969 | */ |
| 970 | public Token token() { |
| 971 | return token; |
| 972 | } |
| 973 | |
| 974 | /** Sets the current token. |
| 975 | */ |
| 976 | public void token(Token token) { |
| 977 | this.token = token; |
| 978 | } |
| 979 | |
| 980 | /** Return the current token's position: a 0-based |
| 981 | * offset from beginning of the raw input stream |
| 982 | * (before unicode translation) |
| 983 | */ |
| 984 | public int pos() { |
| 985 | return pos; |
| 986 | } |
| 987 | |
| 988 | /** Return the last character position of the current token. |
| 989 | */ |
| 990 | public int endPos() { |
| 991 | return endPos; |
| 992 | } |
| 993 | |
| 994 | /** Return the last character position of the previous token. |
| 995 | */ |
| 996 | public int prevEndPos() { |
| 997 | return prevEndPos; |
| 998 | } |
| 999 | |
| 1000 | /** Return the position where a lexical error occurred; |
| 1001 | */ |
| 1002 | public int errPos() { |
| 1003 | return errPos; |
| 1004 | } |
| 1005 | |
| 1006 | /** Set the position where a lexical error occurred; |
| 1007 | */ |
| 1008 | public void errPos(int pos) { |
| 1009 | errPos = pos; |
| 1010 | } |
| 1011 | |
| 1012 | /** Return the name of an identifier or token for the current token. |
| 1013 | */ |
| 1014 | public Name name() { |
| 1015 | return name; |
| 1016 | } |
| 1017 | |
| 1018 | /** Return the radix of a numeric literal token. |
| 1019 | */ |
| 1020 | public int radix() { |
| 1021 | return radix; |
| 1022 | } |
| 1023 | |
| 1024 | /** Has a @deprecated been encountered in last doc comment? |
| 1025 | * This needs to be reset by client with resetDeprecatedFlag. |
| 1026 | */ |
| 1027 | public boolean deprecatedFlag() { |
| 1028 | return deprecatedFlag; |
| 1029 | } |
| 1030 | |
| 1031 | public void resetDeprecatedFlag() { |
| 1032 | deprecatedFlag = false; |
| 1033 | } |
| 1034 | |
| 1035 | /** |
| 1036 | * Returns the documentation string of the current token. |
| 1037 | */ |
| 1038 | public String docComment() { |
| 1039 | return null; |
| 1040 | } |
| 1041 | |
| 1042 | /** |
| 1043 | * Returns a copy of the input buffer, up to its inputLength. |
| 1044 | * Unicode escape sequences are not translated. |
| 1045 | */ |
| 1046 | public char[] getRawCharacters() { |
| 1047 | char[] chars = new char[buflen]; |
| 1048 | System.arraycopy(buf, 0, chars, 0, buflen); |
| 1049 | return chars; |
| 1050 | } |
| 1051 | |
| 1052 | /** |
| 1053 | * Returns a copy of a character array subset of the input buffer. |
| 1054 | * The returned array begins at the <code>beginIndex</code> and |
| 1055 | * extends to the character at index <code>endIndex - 1</code>. |
| 1056 | * Thus the length of the substring is <code>endIndex-beginIndex</code>. |
| 1057 | * This behavior is like |
| 1058 | * <code>String.substring(beginIndex, endIndex)</code>. |
| 1059 | * Unicode escape sequences are not translated. |
| 1060 | * |
| 1061 | * @param beginIndex the beginning index, inclusive. |
| 1062 | * @param endIndex the ending index, exclusive. |
| 1063 | * @throws IndexOutOfBounds if either offset is outside of the |
| 1064 | * array bounds |
| 1065 | */ |
| 1066 | public char[] getRawCharacters(int beginIndex, int endIndex) { |
| 1067 | int length = endIndex - beginIndex; |
| 1068 | char[] chars = new char[length]; |
| 1069 | System.arraycopy(buf, beginIndex, chars, 0, length); |
| 1070 | return chars; |
| 1071 | } |
| 1072 | |
| 1073 | public enum CommentStyle { |
| 1074 | LINE, |
| 1075 | BLOCK, |
| 1076 | JAVADOC, |
| 1077 | } |
| 1078 | |
| 1079 | /** |
| 1080 | * Called when a complete comment has been scanned. pos and endPos |
| 1081 | * will mark the comment boundary. |
| 1082 | */ |
| 1083 | protected void processComment(CommentStyle style) { |
| 1084 | if (scannerDebug) |
| 1085 | System.out.println("processComment(" + pos |
| 1086 | + "," + endPos + "," + style + ")=|" |
| 1087 | + new String(getRawCharacters(pos, endPos)) |
| 1088 | + "|"); |
| 1089 | } |
| 1090 | |
| 1091 | /** |
| 1092 | * Called when a complete whitespace run has been scanned. pos and endPos |
| 1093 | * will mark the whitespace boundary. |
| 1094 | */ |
| 1095 | protected void processWhiteSpace() { |
| 1096 | if (scannerDebug) |
| 1097 | System.out.println("processWhitespace(" + pos |
| 1098 | + "," + endPos + ")=|" + |
| 1099 | new String(getRawCharacters(pos, endPos)) |
| 1100 | + "|"); |
| 1101 | } |
| 1102 | |
| 1103 | /** |
| 1104 | * Called when a line terminator has been processed. |
| 1105 | */ |
| 1106 | protected void processLineTerminator() { |
| 1107 | if (scannerDebug) |
| 1108 | System.out.println("processTerminator(" + pos |
| 1109 | + "," + endPos + ")=|" + |
| 1110 | new String(getRawCharacters(pos, endPos)) |
| 1111 | + "|"); |
| 1112 | } |
| 1113 | |
| 1114 | /** Build a map for translating between line numbers and |
| 1115 | * positions in the input. |
| 1116 | * |
| 1117 | * @return a LineMap */ |
| 1118 | public Position.LineMap getLineMap() { |
| 1119 | return Position.makeLineMap(buf, buflen, false); |
| 1120 | } |
| 1121 | |
| 1122 | } |