| 1 | /* | 
| 2 |  * Copyright 2004-2006 Sun Microsystems, Inc.  All Rights Reserved. | 
| 3 |  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. | 
| 4 |  * | 
| 5 |  * This code is free software; you can redistribute it and/or modify it | 
| 6 |  * under the terms of the GNU General Public License version 2 only, as | 
| 7 |  * published by the Free Software Foundation.  Sun designates this | 
| 8 |  * particular file as subject to the "Classpath" exception as provided | 
| 9 |  * by Sun in the LICENSE file that accompanied this code. | 
| 10 |  * | 
| 11 |  * This code is distributed in the hope that it will be useful, but WITHOUT | 
| 12 |  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | 
| 13 |  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License | 
| 14 |  * version 2 for more details (a copy is included in the LICENSE file that | 
| 15 |  * accompanied this code). | 
| 16 |  * | 
| 17 |  * You should have received a copy of the GNU General Public License version | 
| 18 |  * 2 along with this work; if not, write to the Free Software Foundation, | 
| 19 |  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. | 
| 20 |  * | 
| 21 |  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, | 
| 22 |  * CA 95054 USA or visit www.sun.com if you need additional information or | 
| 23 |  * have any questions. | 
| 24 |  */ | 
| 25 |   | 
| 26 | package com.sun.tools.javac.parser; | 
| 27 |   | 
| 28 | import java.io.*; | 
| 29 | import java.nio.*; | 
| 30 |   | 
| 31 | import com.sun.tools.javac.util.*; | 
| 32 | import static com.sun.tools.javac.util.LayoutCharacters.*; | 
| 33 |   | 
| 34 | /** An extension to the base lexical analyzer that captures | 
| 35 |  *  and processes the contents of doc comments.  It does so by | 
| 36 |  *  translating Unicode escape sequences and by stripping the | 
| 37 |  *  leading whitespace and starts from each line of the comment. | 
| 38 |  * | 
| 39 |  *  <p><b>This is NOT part of any API supported by Sun Microsystems.  If | 
| 40 |  *  you write code that depends on this, you do so at your own risk. | 
| 41 |  *  This code and its internal interfaces are subject to change or | 
| 42 |  *  deletion without notice.</b> | 
| 43 |  */ | 
| 44 | public class DocCommentScanner extends Scanner { | 
| 45 |   | 
| 46 |     /** A factory for creating scanners. */ | 
| 47 |     public static class Factory extends Scanner.Factory { | 
| 48 |   | 
| 49 |         public static void preRegister(final Context context) { | 
| 50 |             context.put(scannerFactoryKey, new Context.Factory<Scanner.Factory>() { | 
| 51 |                 public Factory make() { | 
| 52 |                     return new Factory(context); | 
| 53 |                 } | 
| 54 |             }); | 
| 55 |         } | 
| 56 |   | 
| 57 |         /** Create a new scanner factory. */ | 
| 58 |         protected Factory(Context context) { | 
| 59 |             super(context); | 
| 60 |         } | 
| 61 |   | 
| 62 |         @Override | 
| 63 |         public Scanner newScanner(CharSequence input) { | 
| 64 |             if (input instanceof CharBuffer) { | 
| 65 |                 return new DocCommentScanner(this, (CharBuffer)input); | 
| 66 |             } else { | 
| 67 |                 char[] array = input.toString().toCharArray(); | 
| 68 |                 return newScanner(array, array.length); | 
| 69 |             } | 
| 70 |         } | 
| 71 |   | 
| 72 |         @Override | 
| 73 |         public Scanner newScanner(char[] input, int inputLength) { | 
| 74 |             return new DocCommentScanner(this, input, inputLength); | 
| 75 |         } | 
| 76 |     } | 
| 77 |   | 
| 78 |   | 
| 79 |     /** Create a scanner from the input buffer.  buffer must implement | 
| 80 |      *  array() and compact(), and remaining() must be less than limit(). | 
| 81 |      */ | 
| 82 |     protected DocCommentScanner(Factory fac, CharBuffer buffer) { | 
| 83 |         super(fac, buffer); | 
| 84 |     } | 
| 85 |   | 
| 86 |     /** Create a scanner from the input array.  The array must have at | 
| 87 |      *  least a single character of extra space. | 
| 88 |      */ | 
| 89 |     protected DocCommentScanner(Factory fac, char[] input, int inputLength) { | 
| 90 |         super(fac, input, inputLength); | 
| 91 |     } | 
| 92 |   | 
| 93 |     /** Starting position of the comment in original source | 
| 94 |      */ | 
| 95 |     private int pos; | 
| 96 |   | 
| 97 |     /** The comment input buffer, index of next chacter to be read, | 
| 98 |      *  index of one past last character in buffer. | 
| 99 |      */ | 
| 100 |     private char[] buf; | 
| 101 |     private int bp; | 
| 102 |     private int buflen; | 
| 103 |   | 
| 104 |     /** The current character. | 
| 105 |      */ | 
| 106 |     private char ch; | 
| 107 |   | 
| 108 |     /** The column number position of the current character. | 
| 109 |      */ | 
| 110 |     private int col; | 
| 111 |   | 
| 112 |     /** The buffer index of the last converted Unicode character | 
| 113 |      */ | 
| 114 |     private int unicodeConversionBp = 0; | 
| 115 |   | 
| 116 |     /** | 
| 117 |      * Buffer for doc comment. | 
| 118 |      */ | 
| 119 |     private char[] docCommentBuffer = new char[1024]; | 
| 120 |   | 
| 121 |     /** | 
| 122 |      * Number of characters in doc comment buffer. | 
| 123 |      */ | 
| 124 |     private int docCommentCount; | 
| 125 |   | 
| 126 |     /** | 
| 127 |      * Translated and stripped contents of doc comment | 
| 128 |      */ | 
| 129 |     private String docComment = null; | 
| 130 |   | 
| 131 |   | 
| 132 |     /** Unconditionally expand the comment buffer. | 
| 133 |      */ | 
| 134 |     private void expandCommentBuffer() { | 
| 135 |         char[] newBuffer = new char[docCommentBuffer.length * 2]; | 
| 136 |         System.arraycopy(docCommentBuffer, 0, newBuffer, | 
| 137 |                          0, docCommentBuffer.length); | 
| 138 |         docCommentBuffer = newBuffer; | 
| 139 |     } | 
| 140 |   | 
| 141 |     /** Convert an ASCII digit from its base (8, 10, or 16) | 
| 142 |      *  to its value. | 
| 143 |      */ | 
| 144 |     private int digit(int base) { | 
| 145 |         char c = ch; | 
| 146 |         int result = Character.digit(c, base); | 
| 147 |         if (result >= 0 && c > 0x7f) { | 
| 148 |             ch = "0123456789abcdef".charAt(result); | 
| 149 |         } | 
| 150 |         return result; | 
| 151 |     } | 
| 152 |   | 
| 153 |     /** Convert Unicode escape; bp points to initial '\' character | 
| 154 |      *  (Spec 3.3). | 
| 155 |      */ | 
| 156 |     private void convertUnicode() { | 
| 157 |         if (ch == '\\' && unicodeConversionBp != bp) { | 
| 158 |             bp++; ch = buf[bp]; col++; | 
| 159 |             if (ch == 'u') { | 
| 160 |                 do { | 
| 161 |                     bp++; ch = buf[bp]; col++; | 
| 162 |                 } while (ch == 'u'); | 
| 163 |                 int limit = bp + 3; | 
| 164 |                 if (limit < buflen) { | 
| 165 |                     int d = digit(16); | 
| 166 |                     int code = d; | 
| 167 |                     while (bp < limit && d >= 0) { | 
| 168 |                         bp++; ch = buf[bp]; col++; | 
| 169 |                         d = digit(16); | 
| 170 |                         code = (code << 4) + d; | 
| 171 |                     } | 
| 172 |                     if (d >= 0) { | 
| 173 |                         ch = (char)code; | 
| 174 |                         unicodeConversionBp = bp; | 
| 175 |                         return; | 
| 176 |                     } | 
| 177 |                 } | 
| 178 |                 // "illegal.Unicode.esc", reported by base scanner | 
| 179 |             } else { | 
| 180 |                 bp--; | 
| 181 |                 ch = '\\'; | 
| 182 |                 col--; | 
| 183 |             } | 
| 184 |         } | 
| 185 |     } | 
| 186 |   | 
| 187 |   | 
| 188 |     /** Read next character. | 
| 189 |      */ | 
| 190 |     private void scanChar() { | 
| 191 |         bp++; | 
| 192 |         ch = buf[bp]; | 
| 193 |         switch (ch) { | 
| 194 |         case '\r': // return | 
| 195 |             col = 0; | 
| 196 |             break; | 
| 197 |         case '\n': // newline | 
| 198 |             if (bp == 0 || buf[bp-1] != '\r') { | 
| 199 |                 col = 0; | 
| 200 |             } | 
| 201 |             break; | 
| 202 |         case '\t': // tab | 
| 203 |             col = (col / TabInc * TabInc) + TabInc; | 
| 204 |             break; | 
| 205 |         case '\\': // possible Unicode | 
| 206 |             col++; | 
| 207 |             convertUnicode(); | 
| 208 |             break; | 
| 209 |         default: | 
| 210 |             col++; | 
| 211 |             break; | 
| 212 |         } | 
| 213 |     } | 
| 214 |   | 
| 215 |     /** | 
| 216 |      * Read next character in doc comment, skipping over double '\' characters. | 
| 217 |      * If a double '\' is skipped, put in the buffer and update buffer count. | 
| 218 |      */ | 
| 219 |     private void scanDocCommentChar() { | 
| 220 |         scanChar(); | 
| 221 |         if (ch == '\\') { | 
| 222 |             if (buf[bp+1] == '\\' && unicodeConversionBp != bp) { | 
| 223 |                 if (docCommentCount == docCommentBuffer.length) | 
| 224 |                     expandCommentBuffer(); | 
| 225 |                 docCommentBuffer[docCommentCount++] = ch; | 
| 226 |                 bp++; col++; | 
| 227 |             } else { | 
| 228 |                 convertUnicode(); | 
| 229 |             } | 
| 230 |         } | 
| 231 |     } | 
| 232 |   | 
| 233 |     /* Reset doc comment before reading each new token | 
| 234 |      */ | 
| 235 |     public void nextToken() { | 
| 236 |         docComment = null; | 
| 237 |         super.nextToken(); | 
| 238 |     } | 
| 239 |   | 
| 240 |     /** | 
| 241 |      * Returns the documentation string of the current token. | 
| 242 |      */ | 
| 243 |     public String docComment() { | 
| 244 |         return docComment; | 
| 245 |     } | 
| 246 |   | 
| 247 |     /** | 
| 248 |      * Process a doc comment and make the string content available. | 
| 249 |      * Strips leading whitespace and stars. | 
| 250 |      */ | 
| 251 |     @SuppressWarnings("fallthrough") | 
| 252 |     protected void processComment(CommentStyle style) { | 
| 253 |         if (style != CommentStyle.JAVADOC) { | 
| 254 |             return; | 
| 255 |         } | 
| 256 |   | 
| 257 |         pos = pos(); | 
| 258 |         buf = getRawCharacters(pos, endPos()); | 
| 259 |         buflen = buf.length; | 
| 260 |         bp = 0; | 
| 261 |         col = 0; | 
| 262 |   | 
| 263 |         docCommentCount = 0; | 
| 264 |   | 
| 265 |         boolean firstLine = true; | 
| 266 |   | 
| 267 |         // Skip over first slash | 
| 268 |         scanDocCommentChar(); | 
| 269 |         // Skip over first star | 
| 270 |         scanDocCommentChar(); | 
| 271 |   | 
| 272 |         // consume any number of stars | 
| 273 |         while (bp < buflen && ch == '*') { | 
| 274 |             scanDocCommentChar(); | 
| 275 |         } | 
| 276 |         // is the comment in the form /**/, /***/, /****/, etc. ? | 
| 277 |         if (bp < buflen && ch == '/') { | 
| 278 |             docComment = ""; | 
| 279 |             return; | 
| 280 |         } | 
| 281 |   | 
| 282 |         // skip a newline on the first line of the comment. | 
| 283 |         if (bp < buflen) { | 
| 284 |             if (ch == LF) { | 
| 285 |                 scanDocCommentChar(); | 
| 286 |                 firstLine = false; | 
| 287 |             } else if (ch == CR) { | 
| 288 |                 scanDocCommentChar(); | 
| 289 |                 if (ch == LF) { | 
| 290 |                     scanDocCommentChar(); | 
| 291 |                     firstLine = false; | 
| 292 |                 } | 
| 293 |             } | 
| 294 |         } | 
| 295 |   | 
| 296 |     outerLoop: | 
| 297 |   | 
| 298 |         // The outerLoop processes the doc comment, looping once | 
| 299 |         // for each line.  For each line, it first strips off | 
| 300 |         // whitespace, then it consumes any stars, then it | 
| 301 |         // puts the rest of the line into our buffer. | 
| 302 |         while (bp < buflen) { | 
| 303 |   | 
| 304 |             // The wsLoop consumes whitespace from the beginning | 
| 305 |             // of each line. | 
| 306 |         wsLoop: | 
| 307 |   | 
| 308 |             while (bp < buflen) { | 
| 309 |                 switch(ch) { | 
| 310 |                 case ' ': | 
| 311 |                     scanDocCommentChar(); | 
| 312 |                     break; | 
| 313 |                 case '\t': | 
| 314 |                     col = ((col - 1) / TabInc * TabInc) + TabInc; | 
| 315 |                     scanDocCommentChar(); | 
| 316 |                     break; | 
| 317 |                 case FF: | 
| 318 |                     col = 0; | 
| 319 |                     scanDocCommentChar(); | 
| 320 |                     break; | 
| 321 | // Treat newline at beginning of line (blank line, no star) | 
| 322 | // as comment text.  Old Javadoc compatibility requires this. | 
| 323 | /*---------------------------------* | 
| 324 |                 case CR: // (Spec 3.4) | 
| 325 |                     scanDocCommentChar(); | 
| 326 |                     if (ch == LF) { | 
| 327 |                         col = 0; | 
| 328 |                         scanDocCommentChar(); | 
| 329 |                     } | 
| 330 |                     break; | 
| 331 |                 case LF: // (Spec 3.4) | 
| 332 |                     scanDocCommentChar(); | 
| 333 |                     break; | 
| 334 | *---------------------------------*/ | 
| 335 |                 default: | 
| 336 |                     // we've seen something that isn't whitespace; | 
| 337 |                     // jump out. | 
| 338 |                     break wsLoop; | 
| 339 |                 } | 
| 340 |             } | 
| 341 |   | 
| 342 |             // Are there stars here?  If so, consume them all | 
| 343 |             // and check for the end of comment. | 
| 344 |             if (ch == '*') { | 
| 345 |                 // skip all of the stars | 
| 346 |                 do { | 
| 347 |                     scanDocCommentChar(); | 
| 348 |                 } while (ch == '*'); | 
| 349 |   | 
| 350 |                 // check for the closing slash. | 
| 351 |                 if (ch == '/') { | 
| 352 |                     // We're done with the doc comment | 
| 353 |                     // scanChar() and breakout. | 
| 354 |                     break outerLoop; | 
| 355 |                 } | 
| 356 |             } else if (! firstLine) { | 
| 357 |                 //The current line does not begin with a '*' so we will indent it. | 
| 358 |                 for (int i = 1; i < col; i++) { | 
| 359 |                     if (docCommentCount == docCommentBuffer.length) | 
| 360 |                         expandCommentBuffer(); | 
| 361 |                     docCommentBuffer[docCommentCount++] = ' '; | 
| 362 |                 } | 
| 363 |             } | 
| 364 |   | 
| 365 |             // The textLoop processes the rest of the characters | 
| 366 |             // on the line, adding them to our buffer. | 
| 367 |         textLoop: | 
| 368 |             while (bp < buflen) { | 
| 369 |                 switch (ch) { | 
| 370 |                 case '*': | 
| 371 |                     // Is this just a star?  Or is this the | 
| 372 |                     // end of a comment? | 
| 373 |                     scanDocCommentChar(); | 
| 374 |                     if (ch == '/') { | 
| 375 |                         // This is the end of the comment, | 
| 376 |                         // set ch and return our buffer. | 
| 377 |                         break outerLoop; | 
| 378 |                     } | 
| 379 |                     // This is just an ordinary star.  Add it to | 
| 380 |                     // the buffer. | 
| 381 |                     if (docCommentCount == docCommentBuffer.length) | 
| 382 |                         expandCommentBuffer(); | 
| 383 |                     docCommentBuffer[docCommentCount++] = '*'; | 
| 384 |                     break; | 
| 385 |                 case ' ': | 
| 386 |                 case '\t': | 
| 387 |                     if (docCommentCount == docCommentBuffer.length) | 
| 388 |                         expandCommentBuffer(); | 
| 389 |                     docCommentBuffer[docCommentCount++] = ch; | 
| 390 |                     scanDocCommentChar(); | 
| 391 |                     break; | 
| 392 |                 case FF: | 
| 393 |                     scanDocCommentChar(); | 
| 394 |                     break textLoop; // treat as end of line | 
| 395 |                 case CR: // (Spec 3.4) | 
| 396 |                     scanDocCommentChar(); | 
| 397 |                     if (ch != LF) { | 
| 398 |                         // Canonicalize CR-only line terminator to LF | 
| 399 |                         if (docCommentCount == docCommentBuffer.length) | 
| 400 |                             expandCommentBuffer(); | 
| 401 |                         docCommentBuffer[docCommentCount++] = (char)LF; | 
| 402 |                         break textLoop; | 
| 403 |                     } | 
| 404 |                     /* fall through to LF case */ | 
| 405 |                 case LF: // (Spec 3.4) | 
| 406 |                     // We've seen a newline.  Add it to our | 
| 407 |                     // buffer and break out of this loop, | 
| 408 |                     // starting fresh on a new line. | 
| 409 |                     if (docCommentCount == docCommentBuffer.length) | 
| 410 |                         expandCommentBuffer(); | 
| 411 |                     docCommentBuffer[docCommentCount++] = ch; | 
| 412 |                     scanDocCommentChar(); | 
| 413 |                     break textLoop; | 
| 414 |                 default: | 
| 415 |                     // Add the character to our buffer. | 
| 416 |                     if (docCommentCount == docCommentBuffer.length) | 
| 417 |                         expandCommentBuffer(); | 
| 418 |                     docCommentBuffer[docCommentCount++] = ch; | 
| 419 |                     scanDocCommentChar(); | 
| 420 |                 } | 
| 421 |             } // end textLoop | 
| 422 |             firstLine = false; | 
| 423 |         } // end outerLoop | 
| 424 |   | 
| 425 |         if (docCommentCount > 0) { | 
| 426 |             int i = docCommentCount - 1; | 
| 427 |         trailLoop: | 
| 428 |             while (i > -1) { | 
| 429 |                 switch (docCommentBuffer[i]) { | 
| 430 |                 case '*': | 
| 431 |                     i--; | 
| 432 |                     break; | 
| 433 |                 default: | 
| 434 |                     break trailLoop; | 
| 435 |                 } | 
| 436 |             } | 
| 437 |             docCommentCount = i + 1; | 
| 438 |   | 
| 439 |             // Store the text of the doc comment | 
| 440 |             docComment = new String(docCommentBuffer, 0 , docCommentCount); | 
| 441 |         } else { | 
| 442 |             docComment = ""; | 
| 443 |         } | 
| 444 |     } | 
| 445 |   | 
| 446 |     /** Build a map for translating between line numbers and | 
| 447 |      * positions in the input. | 
| 448 |      * | 
| 449 |      * @return a LineMap */ | 
| 450 |     public Position.LineMap getLineMap() { | 
| 451 |         char[] buf = getRawCharacters(); | 
| 452 |         return Position.makeLineMap(buf, buf.length, true); | 
| 453 |     } | 
| 454 | } |