1 | /* |
2 | * Copyright 2004-2006 Sun Microsystems, Inc. All Rights Reserved. |
3 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
4 | * |
5 | * This code is free software; you can redistribute it and/or modify it |
6 | * under the terms of the GNU General Public License version 2 only, as |
7 | * published by the Free Software Foundation. Sun designates this |
8 | * particular file as subject to the "Classpath" exception as provided |
9 | * by Sun in the LICENSE file that accompanied this code. |
10 | * |
11 | * This code is distributed in the hope that it will be useful, but WITHOUT |
12 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
13 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
14 | * version 2 for more details (a copy is included in the LICENSE file that |
15 | * accompanied this code). |
16 | * |
17 | * You should have received a copy of the GNU General Public License version |
18 | * 2 along with this work; if not, write to the Free Software Foundation, |
19 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
20 | * |
21 | * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, |
22 | * CA 95054 USA or visit www.sun.com if you need additional information or |
23 | * have any questions. |
24 | */ |
25 | |
26 | package com.sun.tools.javac.parser; |
27 | |
28 | import java.io.*; |
29 | import java.nio.*; |
30 | |
31 | import com.sun.tools.javac.util.*; |
32 | import static com.sun.tools.javac.util.LayoutCharacters.*; |
33 | |
34 | /** An extension to the base lexical analyzer that captures |
35 | * and processes the contents of doc comments. It does so by |
36 | * translating Unicode escape sequences and by stripping the |
37 | * leading whitespace and starts from each line of the comment. |
38 | * |
39 | * <p><b>This is NOT part of any API supported by Sun Microsystems. If |
40 | * you write code that depends on this, you do so at your own risk. |
41 | * This code and its internal interfaces are subject to change or |
42 | * deletion without notice.</b> |
43 | */ |
44 | public class DocCommentScanner extends Scanner { |
45 | |
46 | /** A factory for creating scanners. */ |
47 | public static class Factory extends Scanner.Factory { |
48 | |
49 | public static void preRegister(final Context context) { |
50 | context.put(scannerFactoryKey, new Context.Factory<Scanner.Factory>() { |
51 | public Factory make() { |
52 | return new Factory(context); |
53 | } |
54 | }); |
55 | } |
56 | |
57 | /** Create a new scanner factory. */ |
58 | protected Factory(Context context) { |
59 | super(context); |
60 | } |
61 | |
62 | @Override |
63 | public Scanner newScanner(CharSequence input) { |
64 | if (input instanceof CharBuffer) { |
65 | return new DocCommentScanner(this, (CharBuffer)input); |
66 | } else { |
67 | char[] array = input.toString().toCharArray(); |
68 | return newScanner(array, array.length); |
69 | } |
70 | } |
71 | |
72 | @Override |
73 | public Scanner newScanner(char[] input, int inputLength) { |
74 | return new DocCommentScanner(this, input, inputLength); |
75 | } |
76 | } |
77 | |
78 | |
79 | /** Create a scanner from the input buffer. buffer must implement |
80 | * array() and compact(), and remaining() must be less than limit(). |
81 | */ |
82 | protected DocCommentScanner(Factory fac, CharBuffer buffer) { |
83 | super(fac, buffer); |
84 | } |
85 | |
86 | /** Create a scanner from the input array. The array must have at |
87 | * least a single character of extra space. |
88 | */ |
89 | protected DocCommentScanner(Factory fac, char[] input, int inputLength) { |
90 | super(fac, input, inputLength); |
91 | } |
92 | |
93 | /** Starting position of the comment in original source |
94 | */ |
95 | private int pos; |
96 | |
97 | /** The comment input buffer, index of next chacter to be read, |
98 | * index of one past last character in buffer. |
99 | */ |
100 | private char[] buf; |
101 | private int bp; |
102 | private int buflen; |
103 | |
104 | /** The current character. |
105 | */ |
106 | private char ch; |
107 | |
108 | /** The column number position of the current character. |
109 | */ |
110 | private int col; |
111 | |
112 | /** The buffer index of the last converted Unicode character |
113 | */ |
114 | private int unicodeConversionBp = 0; |
115 | |
116 | /** |
117 | * Buffer for doc comment. |
118 | */ |
119 | private char[] docCommentBuffer = new char[1024]; |
120 | |
121 | /** |
122 | * Number of characters in doc comment buffer. |
123 | */ |
124 | private int docCommentCount; |
125 | |
126 | /** |
127 | * Translated and stripped contents of doc comment |
128 | */ |
129 | private String docComment = null; |
130 | |
131 | |
132 | /** Unconditionally expand the comment buffer. |
133 | */ |
134 | private void expandCommentBuffer() { |
135 | char[] newBuffer = new char[docCommentBuffer.length * 2]; |
136 | System.arraycopy(docCommentBuffer, 0, newBuffer, |
137 | 0, docCommentBuffer.length); |
138 | docCommentBuffer = newBuffer; |
139 | } |
140 | |
141 | /** Convert an ASCII digit from its base (8, 10, or 16) |
142 | * to its value. |
143 | */ |
144 | private int digit(int base) { |
145 | char c = ch; |
146 | int result = Character.digit(c, base); |
147 | if (result >= 0 && c > 0x7f) { |
148 | ch = "0123456789abcdef".charAt(result); |
149 | } |
150 | return result; |
151 | } |
152 | |
153 | /** Convert Unicode escape; bp points to initial '\' character |
154 | * (Spec 3.3). |
155 | */ |
156 | private void convertUnicode() { |
157 | if (ch == '\\' && unicodeConversionBp != bp) { |
158 | bp++; ch = buf[bp]; col++; |
159 | if (ch == 'u') { |
160 | do { |
161 | bp++; ch = buf[bp]; col++; |
162 | } while (ch == 'u'); |
163 | int limit = bp + 3; |
164 | if (limit < buflen) { |
165 | int d = digit(16); |
166 | int code = d; |
167 | while (bp < limit && d >= 0) { |
168 | bp++; ch = buf[bp]; col++; |
169 | d = digit(16); |
170 | code = (code << 4) + d; |
171 | } |
172 | if (d >= 0) { |
173 | ch = (char)code; |
174 | unicodeConversionBp = bp; |
175 | return; |
176 | } |
177 | } |
178 | // "illegal.Unicode.esc", reported by base scanner |
179 | } else { |
180 | bp--; |
181 | ch = '\\'; |
182 | col--; |
183 | } |
184 | } |
185 | } |
186 | |
187 | |
188 | /** Read next character. |
189 | */ |
190 | private void scanChar() { |
191 | bp++; |
192 | ch = buf[bp]; |
193 | switch (ch) { |
194 | case '\r': // return |
195 | col = 0; |
196 | break; |
197 | case '\n': // newline |
198 | if (bp == 0 || buf[bp-1] != '\r') { |
199 | col = 0; |
200 | } |
201 | break; |
202 | case '\t': // tab |
203 | col = (col / TabInc * TabInc) + TabInc; |
204 | break; |
205 | case '\\': // possible Unicode |
206 | col++; |
207 | convertUnicode(); |
208 | break; |
209 | default: |
210 | col++; |
211 | break; |
212 | } |
213 | } |
214 | |
215 | /** |
216 | * Read next character in doc comment, skipping over double '\' characters. |
217 | * If a double '\' is skipped, put in the buffer and update buffer count. |
218 | */ |
219 | private void scanDocCommentChar() { |
220 | scanChar(); |
221 | if (ch == '\\') { |
222 | if (buf[bp+1] == '\\' && unicodeConversionBp != bp) { |
223 | if (docCommentCount == docCommentBuffer.length) |
224 | expandCommentBuffer(); |
225 | docCommentBuffer[docCommentCount++] = ch; |
226 | bp++; col++; |
227 | } else { |
228 | convertUnicode(); |
229 | } |
230 | } |
231 | } |
232 | |
233 | /* Reset doc comment before reading each new token |
234 | */ |
235 | public void nextToken() { |
236 | docComment = null; |
237 | super.nextToken(); |
238 | } |
239 | |
240 | /** |
241 | * Returns the documentation string of the current token. |
242 | */ |
243 | public String docComment() { |
244 | return docComment; |
245 | } |
246 | |
247 | /** |
248 | * Process a doc comment and make the string content available. |
249 | * Strips leading whitespace and stars. |
250 | */ |
251 | @SuppressWarnings("fallthrough") |
252 | protected void processComment(CommentStyle style) { |
253 | if (style != CommentStyle.JAVADOC) { |
254 | return; |
255 | } |
256 | |
257 | pos = pos(); |
258 | buf = getRawCharacters(pos, endPos()); |
259 | buflen = buf.length; |
260 | bp = 0; |
261 | col = 0; |
262 | |
263 | docCommentCount = 0; |
264 | |
265 | boolean firstLine = true; |
266 | |
267 | // Skip over first slash |
268 | scanDocCommentChar(); |
269 | // Skip over first star |
270 | scanDocCommentChar(); |
271 | |
272 | // consume any number of stars |
273 | while (bp < buflen && ch == '*') { |
274 | scanDocCommentChar(); |
275 | } |
276 | // is the comment in the form /**/, /***/, /****/, etc. ? |
277 | if (bp < buflen && ch == '/') { |
278 | docComment = ""; |
279 | return; |
280 | } |
281 | |
282 | // skip a newline on the first line of the comment. |
283 | if (bp < buflen) { |
284 | if (ch == LF) { |
285 | scanDocCommentChar(); |
286 | firstLine = false; |
287 | } else if (ch == CR) { |
288 | scanDocCommentChar(); |
289 | if (ch == LF) { |
290 | scanDocCommentChar(); |
291 | firstLine = false; |
292 | } |
293 | } |
294 | } |
295 | |
296 | outerLoop: |
297 | |
298 | // The outerLoop processes the doc comment, looping once |
299 | // for each line. For each line, it first strips off |
300 | // whitespace, then it consumes any stars, then it |
301 | // puts the rest of the line into our buffer. |
302 | while (bp < buflen) { |
303 | |
304 | // The wsLoop consumes whitespace from the beginning |
305 | // of each line. |
306 | wsLoop: |
307 | |
308 | while (bp < buflen) { |
309 | switch(ch) { |
310 | case ' ': |
311 | scanDocCommentChar(); |
312 | break; |
313 | case '\t': |
314 | col = ((col - 1) / TabInc * TabInc) + TabInc; |
315 | scanDocCommentChar(); |
316 | break; |
317 | case FF: |
318 | col = 0; |
319 | scanDocCommentChar(); |
320 | break; |
321 | // Treat newline at beginning of line (blank line, no star) |
322 | // as comment text. Old Javadoc compatibility requires this. |
323 | /*---------------------------------* |
324 | case CR: // (Spec 3.4) |
325 | scanDocCommentChar(); |
326 | if (ch == LF) { |
327 | col = 0; |
328 | scanDocCommentChar(); |
329 | } |
330 | break; |
331 | case LF: // (Spec 3.4) |
332 | scanDocCommentChar(); |
333 | break; |
334 | *---------------------------------*/ |
335 | default: |
336 | // we've seen something that isn't whitespace; |
337 | // jump out. |
338 | break wsLoop; |
339 | } |
340 | } |
341 | |
342 | // Are there stars here? If so, consume them all |
343 | // and check for the end of comment. |
344 | if (ch == '*') { |
345 | // skip all of the stars |
346 | do { |
347 | scanDocCommentChar(); |
348 | } while (ch == '*'); |
349 | |
350 | // check for the closing slash. |
351 | if (ch == '/') { |
352 | // We're done with the doc comment |
353 | // scanChar() and breakout. |
354 | break outerLoop; |
355 | } |
356 | } else if (! firstLine) { |
357 | //The current line does not begin with a '*' so we will indent it. |
358 | for (int i = 1; i < col; i++) { |
359 | if (docCommentCount == docCommentBuffer.length) |
360 | expandCommentBuffer(); |
361 | docCommentBuffer[docCommentCount++] = ' '; |
362 | } |
363 | } |
364 | |
365 | // The textLoop processes the rest of the characters |
366 | // on the line, adding them to our buffer. |
367 | textLoop: |
368 | while (bp < buflen) { |
369 | switch (ch) { |
370 | case '*': |
371 | // Is this just a star? Or is this the |
372 | // end of a comment? |
373 | scanDocCommentChar(); |
374 | if (ch == '/') { |
375 | // This is the end of the comment, |
376 | // set ch and return our buffer. |
377 | break outerLoop; |
378 | } |
379 | // This is just an ordinary star. Add it to |
380 | // the buffer. |
381 | if (docCommentCount == docCommentBuffer.length) |
382 | expandCommentBuffer(); |
383 | docCommentBuffer[docCommentCount++] = '*'; |
384 | break; |
385 | case ' ': |
386 | case '\t': |
387 | if (docCommentCount == docCommentBuffer.length) |
388 | expandCommentBuffer(); |
389 | docCommentBuffer[docCommentCount++] = ch; |
390 | scanDocCommentChar(); |
391 | break; |
392 | case FF: |
393 | scanDocCommentChar(); |
394 | break textLoop; // treat as end of line |
395 | case CR: // (Spec 3.4) |
396 | scanDocCommentChar(); |
397 | if (ch != LF) { |
398 | // Canonicalize CR-only line terminator to LF |
399 | if (docCommentCount == docCommentBuffer.length) |
400 | expandCommentBuffer(); |
401 | docCommentBuffer[docCommentCount++] = (char)LF; |
402 | break textLoop; |
403 | } |
404 | /* fall through to LF case */ |
405 | case LF: // (Spec 3.4) |
406 | // We've seen a newline. Add it to our |
407 | // buffer and break out of this loop, |
408 | // starting fresh on a new line. |
409 | if (docCommentCount == docCommentBuffer.length) |
410 | expandCommentBuffer(); |
411 | docCommentBuffer[docCommentCount++] = ch; |
412 | scanDocCommentChar(); |
413 | break textLoop; |
414 | default: |
415 | // Add the character to our buffer. |
416 | if (docCommentCount == docCommentBuffer.length) |
417 | expandCommentBuffer(); |
418 | docCommentBuffer[docCommentCount++] = ch; |
419 | scanDocCommentChar(); |
420 | } |
421 | } // end textLoop |
422 | firstLine = false; |
423 | } // end outerLoop |
424 | |
425 | if (docCommentCount > 0) { |
426 | int i = docCommentCount - 1; |
427 | trailLoop: |
428 | while (i > -1) { |
429 | switch (docCommentBuffer[i]) { |
430 | case '*': |
431 | i--; |
432 | break; |
433 | default: |
434 | break trailLoop; |
435 | } |
436 | } |
437 | docCommentCount = i + 1; |
438 | |
439 | // Store the text of the doc comment |
440 | docComment = new String(docCommentBuffer, 0 , docCommentCount); |
441 | } else { |
442 | docComment = ""; |
443 | } |
444 | } |
445 | |
446 | /** Build a map for translating between line numbers and |
447 | * positions in the input. |
448 | * |
449 | * @return a LineMap */ |
450 | public Position.LineMap getLineMap() { |
451 | char[] buf = getRawCharacters(); |
452 | return Position.makeLineMap(buf, buf.length, true); |
453 | } |
454 | } |