001 /*
002 * Copyright 2004-2008 Sun Microsystems, Inc. All Rights Reserved.
003 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
004 *
005 * This code is free software; you can redistribute it and/or modify it
006 * under the terms of the GNU General Public License version 2 only, as
007 * published by the Free Software Foundation. Sun designates this
008 * particular file as subject to the "Classpath" exception as provided
009 * by Sun in the LICENSE file that accompanied this code.
010 *
011 * This code is distributed in the hope that it will be useful, but WITHOUT
012 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
013 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
014 * version 2 for more details (a copy is included in the LICENSE file that
015 * accompanied this code).
016 *
017 * You should have received a copy of the GNU General Public License version
018 * 2 along with this work; if not, write to the Free Software Foundation,
019 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
020 *
021 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
022 * CA 95054 USA or visit www.sun.com if you need additional information or
023 * have any questions.
024 */
025
026 package com.sun.tools.javac.parser;
027
028 import java.nio.*;
029
030 import com.sun.tools.javac.util.*;
031 import static com.sun.tools.javac.util.LayoutCharacters.*;
032
033 /** An extension to the base lexical analyzer that captures
034 * and processes the contents of doc comments. It does so by
035 * translating Unicode escape sequences and by stripping the
036 * leading whitespace and starts from each line of the comment.
037 *
038 * <p><b>This is NOT part of any API supported by Sun Microsystems. If
039 * you write code that depends on this, you do so at your own risk.
040 * This code and its internal interfaces are subject to change or
041 * deletion without notice.</b>
042 */
043 public class DocCommentScanner extends Scanner {
044
045 /** A factory for creating scanners. */
046 public static class Factory extends Scanner.Factory {
047
048 public static void preRegister(final Context context) {
049 context.put(scannerFactoryKey, new Context.Factory<Scanner.Factory>() {
050 public Factory make() {
051 return new Factory(context);
052 }
053 });
054 }
055
056 /** Create a new scanner factory. */
057 protected Factory(Context context) {
058 super(context);
059 }
060
061 @Override
062 public Scanner newScanner(CharSequence input) {
063 if (input instanceof CharBuffer) {
064 return new DocCommentScanner(this, (CharBuffer)input);
065 } else {
066 char[] array = input.toString().toCharArray();
067 return newScanner(array, array.length);
068 }
069 }
070
071 @Override
072 public Scanner newScanner(char[] input, int inputLength) {
073 return new DocCommentScanner(this, input, inputLength);
074 }
075 }
076
077
078 /** Create a scanner from the input buffer. buffer must implement
079 * array() and compact(), and remaining() must be less than limit().
080 */
081 protected DocCommentScanner(Factory fac, CharBuffer buffer) {
082 super(fac, buffer);
083 }
084
085 /** Create a scanner from the input array. The array must have at
086 * least a single character of extra space.
087 */
088 protected DocCommentScanner(Factory fac, char[] input, int inputLength) {
089 super(fac, input, inputLength);
090 }
091
092 /** Starting position of the comment in original source
093 */
094 private int pos;
095
096 /** The comment input buffer, index of next chacter to be read,
097 * index of one past last character in buffer.
098 */
099 private char[] buf;
100 private int bp;
101 private int buflen;
102
103 /** The current character.
104 */
105 private char ch;
106
107 /** The column number position of the current character.
108 */
109 private int col;
110
111 /** The buffer index of the last converted Unicode character
112 */
113 private int unicodeConversionBp = 0;
114
115 /**
116 * Buffer for doc comment.
117 */
118 private char[] docCommentBuffer = new char[1024];
119
120 /**
121 * Number of characters in doc comment buffer.
122 */
123 private int docCommentCount;
124
125 /**
126 * Translated and stripped contents of doc comment
127 */
128 private String docComment = null;
129
130
131 /** Unconditionally expand the comment buffer.
132 */
133 private void expandCommentBuffer() {
134 char[] newBuffer = new char[docCommentBuffer.length * 2];
135 System.arraycopy(docCommentBuffer, 0, newBuffer,
136 0, docCommentBuffer.length);
137 docCommentBuffer = newBuffer;
138 }
139
140 /** Convert an ASCII digit from its base (8, 10, or 16)
141 * to its value.
142 */
143 private int digit(int base) {
144 char c = ch;
145 int result = Character.digit(c, base);
146 if (result >= 0 && c > 0x7f) {
147 ch = "0123456789abcdef".charAt(result);
148 }
149 return result;
150 }
151
152 /** Convert Unicode escape; bp points to initial '\' character
153 * (Spec 3.3).
154 */
155 private void convertUnicode() {
156 if (ch == '\\' && unicodeConversionBp != bp) {
157 bp++; ch = buf[bp]; col++;
158 if (ch == 'u') {
159 do {
160 bp++; ch = buf[bp]; col++;
161 } while (ch == 'u');
162 int limit = bp + 3;
163 if (limit < buflen) {
164 int d = digit(16);
165 int code = d;
166 while (bp < limit && d >= 0) {
167 bp++; ch = buf[bp]; col++;
168 d = digit(16);
169 code = (code << 4) + d;
170 }
171 if (d >= 0) {
172 ch = (char)code;
173 unicodeConversionBp = bp;
174 return;
175 }
176 }
177 // "illegal.Unicode.esc", reported by base scanner
178 } else {
179 bp--;
180 ch = '\\';
181 col--;
182 }
183 }
184 }
185
186
187 /** Read next character.
188 */
189 private void scanChar() {
190 bp++;
191 ch = buf[bp];
192 switch (ch) {
193 case '\r': // return
194 col = 0;
195 break;
196 case '\n': // newline
197 if (bp == 0 || buf[bp-1] != '\r') {
198 col = 0;
199 }
200 break;
201 case '\t': // tab
202 col = (col / TabInc * TabInc) + TabInc;
203 break;
204 case '\\': // possible Unicode
205 col++;
206 convertUnicode();
207 break;
208 default:
209 col++;
210 break;
211 }
212 }
213
214 /**
215 * Read next character in doc comment, skipping over double '\' characters.
216 * If a double '\' is skipped, put in the buffer and update buffer count.
217 */
218 private void scanDocCommentChar() {
219 scanChar();
220 if (ch == '\\') {
221 if (buf[bp+1] == '\\' && unicodeConversionBp != bp) {
222 if (docCommentCount == docCommentBuffer.length)
223 expandCommentBuffer();
224 docCommentBuffer[docCommentCount++] = ch;
225 bp++; col++;
226 } else {
227 convertUnicode();
228 }
229 }
230 }
231
232 /* Reset doc comment before reading each new token
233 */
234 public void nextToken() {
235 docComment = null;
236 super.nextToken();
237 }
238
239 /**
240 * Returns the documentation string of the current token.
241 */
242 public String docComment() {
243 return docComment;
244 }
245
246 /**
247 * Process a doc comment and make the string content available.
248 * Strips leading whitespace and stars.
249 */
250 @SuppressWarnings("fallthrough")
251 protected void processComment(CommentStyle style) {
252 if (style != CommentStyle.JAVADOC) {
253 return;
254 }
255
256 pos = pos();
257 buf = getRawCharacters(pos, endPos());
258 buflen = buf.length;
259 bp = 0;
260 col = 0;
261
262 docCommentCount = 0;
263
264 boolean firstLine = true;
265
266 // Skip over first slash
267 scanDocCommentChar();
268 // Skip over first star
269 scanDocCommentChar();
270
271 // consume any number of stars
272 while (bp < buflen && ch == '*') {
273 scanDocCommentChar();
274 }
275 // is the comment in the form /**/, /***/, /****/, etc. ?
276 if (bp < buflen && ch == '/') {
277 docComment = "";
278 return;
279 }
280
281 // skip a newline on the first line of the comment.
282 if (bp < buflen) {
283 if (ch == LF) {
284 scanDocCommentChar();
285 firstLine = false;
286 } else if (ch == CR) {
287 scanDocCommentChar();
288 if (ch == LF) {
289 scanDocCommentChar();
290 firstLine = false;
291 }
292 }
293 }
294
295 outerLoop:
296
297 // The outerLoop processes the doc comment, looping once
298 // for each line. For each line, it first strips off
299 // whitespace, then it consumes any stars, then it
300 // puts the rest of the line into our buffer.
301 while (bp < buflen) {
302
303 // The wsLoop consumes whitespace from the beginning
304 // of each line.
305 wsLoop:
306
307 while (bp < buflen) {
308 switch(ch) {
309 case ' ':
310 scanDocCommentChar();
311 break;
312 case '\t':
313 col = ((col - 1) / TabInc * TabInc) + TabInc;
314 scanDocCommentChar();
315 break;
316 case FF:
317 col = 0;
318 scanDocCommentChar();
319 break;
320 // Treat newline at beginning of line (blank line, no star)
321 // as comment text. Old Javadoc compatibility requires this.
322 /*---------------------------------*
323 case CR: // (Spec 3.4)
324 scanDocCommentChar();
325 if (ch == LF) {
326 col = 0;
327 scanDocCommentChar();
328 }
329 break;
330 case LF: // (Spec 3.4)
331 scanDocCommentChar();
332 break;
333 *---------------------------------*/
334 default:
335 // we've seen something that isn't whitespace;
336 // jump out.
337 break wsLoop;
338 }
339 }
340
341 // Are there stars here? If so, consume them all
342 // and check for the end of comment.
343 if (ch == '*') {
344 // skip all of the stars
345 do {
346 scanDocCommentChar();
347 } while (ch == '*');
348
349 // check for the closing slash.
350 if (ch == '/') {
351 // We're done with the doc comment
352 // scanChar() and breakout.
353 break outerLoop;
354 }
355 } else if (! firstLine) {
356 //The current line does not begin with a '*' so we will indent it.
357 for (int i = 1; i < col; i++) {
358 if (docCommentCount == docCommentBuffer.length)
359 expandCommentBuffer();
360 docCommentBuffer[docCommentCount++] = ' ';
361 }
362 }
363
364 // The textLoop processes the rest of the characters
365 // on the line, adding them to our buffer.
366 textLoop:
367 while (bp < buflen) {
368 switch (ch) {
369 case '*':
370 // Is this just a star? Or is this the
371 // end of a comment?
372 scanDocCommentChar();
373 if (ch == '/') {
374 // This is the end of the comment,
375 // set ch and return our buffer.
376 break outerLoop;
377 }
378 // This is just an ordinary star. Add it to
379 // the buffer.
380 if (docCommentCount == docCommentBuffer.length)
381 expandCommentBuffer();
382 docCommentBuffer[docCommentCount++] = '*';
383 break;
384 case ' ':
385 case '\t':
386 if (docCommentCount == docCommentBuffer.length)
387 expandCommentBuffer();
388 docCommentBuffer[docCommentCount++] = ch;
389 scanDocCommentChar();
390 break;
391 case FF:
392 scanDocCommentChar();
393 break textLoop; // treat as end of line
394 case CR: // (Spec 3.4)
395 scanDocCommentChar();
396 if (ch != LF) {
397 // Canonicalize CR-only line terminator to LF
398 if (docCommentCount == docCommentBuffer.length)
399 expandCommentBuffer();
400 docCommentBuffer[docCommentCount++] = (char)LF;
401 break textLoop;
402 }
403 /* fall through to LF case */
404 case LF: // (Spec 3.4)
405 // We've seen a newline. Add it to our
406 // buffer and break out of this loop,
407 // starting fresh on a new line.
408 if (docCommentCount == docCommentBuffer.length)
409 expandCommentBuffer();
410 docCommentBuffer[docCommentCount++] = ch;
411 scanDocCommentChar();
412 break textLoop;
413 default:
414 // Add the character to our buffer.
415 if (docCommentCount == docCommentBuffer.length)
416 expandCommentBuffer();
417 docCommentBuffer[docCommentCount++] = ch;
418 scanDocCommentChar();
419 }
420 } // end textLoop
421 firstLine = false;
422 } // end outerLoop
423
424 if (docCommentCount > 0) {
425 int i = docCommentCount - 1;
426 trailLoop:
427 while (i > -1) {
428 switch (docCommentBuffer[i]) {
429 case '*':
430 i--;
431 break;
432 default:
433 break trailLoop;
434 }
435 }
436 docCommentCount = i + 1;
437
438 // Store the text of the doc comment
439 docComment = new String(docCommentBuffer, 0 , docCommentCount);
440 } else {
441 docComment = "";
442 }
443 }
444
445 /** Build a map for translating between line numbers and
446 * positions in the input.
447 *
448 * @return a LineMap */
449 public Position.LineMap getLineMap() {
450 char[] buf = getRawCharacters();
451 return Position.makeLineMap(buf, buf.length, true);
452 }
453 }