001////////////////////////////////////////////////////////////////////////////////
002// checkstyle: Checks Java source code for adherence to a set of rules.
003// Copyright (C) 2001-2017 the original author or authors.
004//
005// This library is free software; you can redistribute it and/or
006// modify it under the terms of the GNU Lesser General Public
007// License as published by the Free Software Foundation; either
008// version 2.1 of the License, or (at your option) any later version.
009//
010// This library is distributed in the hope that it will be useful,
011// but WITHOUT ANY WARRANTY; without even the implied warranty of
012// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
013// Lesser General Public License for more details.
014//
015// You should have received a copy of the GNU Lesser General Public
016// License along with this library; if not, write to the Free Software
017// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
018////////////////////////////////////////////////////////////////////////////////
019
020package com.puppycrawl.tools.checkstyle.checks.javadoc;
021
022import java.util.ArrayDeque;
023import java.util.Collections;
024import java.util.Deque;
025import java.util.List;
026import java.util.Locale;
027import java.util.Set;
028import java.util.TreeSet;
029import java.util.regex.Pattern;
030import java.util.stream.Collectors;
031import java.util.stream.Stream;
032
033import com.puppycrawl.tools.checkstyle.api.AbstractCheck;
034import com.puppycrawl.tools.checkstyle.api.DetailAST;
035import com.puppycrawl.tools.checkstyle.api.FileContents;
036import com.puppycrawl.tools.checkstyle.api.Scope;
037import com.puppycrawl.tools.checkstyle.api.TextBlock;
038import com.puppycrawl.tools.checkstyle.api.TokenTypes;
039import com.puppycrawl.tools.checkstyle.utils.CheckUtils;
040import com.puppycrawl.tools.checkstyle.utils.CommonUtils;
041import com.puppycrawl.tools.checkstyle.utils.ScopeUtils;
042
043/**
044 * Custom Checkstyle Check to validate Javadoc.
045 *
046 * @author Chris Stillwell
047 * @author Daniel Grenner
048 * @author Travis Schneeberger
049 */
050public class JavadocStyleCheck
051    extends AbstractCheck {
052
053    /** Message property key for the Unclosed HTML message. */
054    public static final String MSG_JAVADOC_MISSING = "javadoc.missing";
055
056    /** Message property key for the Unclosed HTML message. */
057    public static final String MSG_EMPTY = "javadoc.empty";
058
059    /** Message property key for the Unclosed HTML message. */
060    public static final String MSG_NO_PERIOD = "javadoc.noPeriod";
061
062    /** Message property key for the Unclosed HTML message. */
063    public static final String MSG_INCOMPLETE_TAG = "javadoc.incompleteTag";
064
065    /** Message property key for the Unclosed HTML message. */
066    public static final String MSG_UNCLOSED_HTML = "javadoc.unclosedHtml";
067
068    /** Message property key for the Extra HTML message. */
069    public static final String MSG_EXTRA_HTML = "javadoc.extraHtml";
070
071    /** HTML tags that do not require a close tag. */
072    private static final Set<String> SINGLE_TAGS = Collections.unmodifiableSortedSet(Stream.of(
073        "br", "li", "dt", "dd", "hr", "img", "p", "td", "tr", "th")
074        .collect(Collectors.toCollection(TreeSet::new)));
075
076    /** HTML tags that are allowed in java docs.
077     * From http://www.w3schools.com/tags/default.asp
078     * The forms and structure tags are not allowed
079     */
080    private static final Set<String> ALLOWED_TAGS = Collections.unmodifiableSortedSet(Stream.of(
081        "a", "abbr", "acronym", "address", "area", "b", "bdo", "big",
082        "blockquote", "br", "caption", "cite", "code", "colgroup", "dd",
083        "del", "div", "dfn", "dl", "dt", "em", "fieldset", "font", "h1",
084        "h2", "h3", "h4", "h5", "h6", "hr", "i", "img", "ins", "kbd",
085        "li", "ol", "p", "pre", "q", "samp", "small", "span", "strong",
086        "style", "sub", "sup", "table", "tbody", "td", "tfoot", "th",
087        "thead", "tr", "tt", "u", "ul", "var")
088        .collect(Collectors.toCollection(TreeSet::new)));
089
090    /** The scope to check. */
091    private Scope scope = Scope.PRIVATE;
092
093    /** The visibility scope where Javadoc comments shouldn't be checked. **/
094    private Scope excludeScope;
095
096    /** Format for matching the end of a sentence. */
097    private Pattern endOfSentenceFormat = Pattern.compile("([.?!][ \t\n\r\f<])|([.?!]$)");
098
099    /**
100     * Indicates if the first sentence should be checked for proper end of
101     * sentence punctuation.
102     */
103    private boolean checkFirstSentence = true;
104
105    /**
106     * Indicates if the HTML within the comment should be checked.
107     */
108    private boolean checkHtml = true;
109
110    /**
111     * Indicates if empty javadoc statements should be checked.
112     */
113    private boolean checkEmptyJavadoc;
114
115    @Override
116    public int[] getDefaultTokens() {
117        return getAcceptableTokens();
118    }
119
120    @Override
121    public int[] getAcceptableTokens() {
122        return new int[] {
123            TokenTypes.ANNOTATION_DEF,
124            TokenTypes.ANNOTATION_FIELD_DEF,
125            TokenTypes.CLASS_DEF,
126            TokenTypes.CTOR_DEF,
127            TokenTypes.ENUM_CONSTANT_DEF,
128            TokenTypes.ENUM_DEF,
129            TokenTypes.INTERFACE_DEF,
130            TokenTypes.METHOD_DEF,
131            TokenTypes.PACKAGE_DEF,
132            TokenTypes.VARIABLE_DEF,
133        };
134    }
135
136    @Override
137    public int[] getRequiredTokens() {
138        return CommonUtils.EMPTY_INT_ARRAY;
139    }
140
141    @Override
142    public void visitToken(DetailAST ast) {
143        if (shouldCheck(ast)) {
144            final FileContents contents = getFileContents();
145            // Need to start searching for the comment before the annotations
146            // that may exist. Even if annotations are not defined on the
147            // package, the ANNOTATIONS AST is defined.
148            final TextBlock textBlock =
149                contents.getJavadocBefore(ast.getFirstChild().getLineNo());
150
151            checkComment(ast, textBlock);
152        }
153    }
154
155    /**
156     * Whether we should check this node.
157     * @param ast a given node.
158     * @return whether we should check a given node.
159     */
160    private boolean shouldCheck(final DetailAST ast) {
161        boolean check = false;
162
163        if (ast.getType() == TokenTypes.PACKAGE_DEF) {
164            check = getFileContents().inPackageInfo();
165        }
166        else if (!ScopeUtils.isInCodeBlock(ast)) {
167            final Scope customScope;
168
169            if (ScopeUtils.isInInterfaceOrAnnotationBlock(ast)
170                    || ast.getType() == TokenTypes.ENUM_CONSTANT_DEF) {
171                customScope = Scope.PUBLIC;
172            }
173            else {
174                customScope = ScopeUtils.getScopeFromMods(ast.findFirstToken(TokenTypes.MODIFIERS));
175            }
176            final Scope surroundingScope = ScopeUtils.getSurroundingScope(ast);
177
178            check = customScope.isIn(scope)
179                    && (surroundingScope == null || surroundingScope.isIn(scope))
180                    && (excludeScope == null
181                        || !customScope.isIn(excludeScope)
182                        || surroundingScope != null
183                            && !surroundingScope.isIn(excludeScope));
184        }
185        return check;
186    }
187
188    /**
189     * Performs the various checks against the Javadoc comment.
190     *
191     * @param ast the AST of the element being documented
192     * @param comment the source lines that make up the Javadoc comment.
193     *
194     * @see #checkFirstSentenceEnding(DetailAST, TextBlock)
195     * @see #checkHtmlTags(DetailAST, TextBlock)
196     */
197    private void checkComment(final DetailAST ast, final TextBlock comment) {
198        if (comment == null) {
199            // checking for missing docs in JavadocStyleCheck is not consistent
200            // with the rest of CheckStyle...  Even though, I didn't think it
201            // made sense to make another check just to ensure that the
202            // package-info.java file actually contains package Javadocs.
203            if (getFileContents().inPackageInfo()) {
204                log(ast.getLineNo(), MSG_JAVADOC_MISSING);
205            }
206        }
207        else {
208            if (checkFirstSentence) {
209                checkFirstSentenceEnding(ast, comment);
210            }
211
212            if (checkHtml) {
213                checkHtmlTags(ast, comment);
214            }
215
216            if (checkEmptyJavadoc) {
217                checkJavadocIsNotEmpty(comment);
218            }
219        }
220    }
221
222    /**
223     * Checks that the first sentence ends with proper punctuation.  This method
224     * uses a regular expression that checks for the presence of a period,
225     * question mark, or exclamation mark followed either by whitespace, an
226     * HTML element, or the end of string. This method ignores {_AT_inheritDoc}
227     * comments for TokenTypes that are valid for {_AT_inheritDoc}.
228     *
229     * @param ast the current node
230     * @param comment the source lines that make up the Javadoc comment.
231     */
232    private void checkFirstSentenceEnding(final DetailAST ast, TextBlock comment) {
233        final String commentText = getCommentText(comment.getText());
234
235        if (!commentText.isEmpty()
236            && !endOfSentenceFormat.matcher(commentText).find()
237            && !(commentText.startsWith("{@inheritDoc}")
238            && JavadocTagInfo.INHERIT_DOC.isValidOn(ast))) {
239            log(comment.getStartLineNo(), MSG_NO_PERIOD);
240        }
241    }
242
243    /**
244     * Checks that the Javadoc is not empty.
245     *
246     * @param comment the source lines that make up the Javadoc comment.
247     */
248    private void checkJavadocIsNotEmpty(TextBlock comment) {
249        final String commentText = getCommentText(comment.getText());
250
251        if (commentText.isEmpty()) {
252            log(comment.getStartLineNo(), MSG_EMPTY);
253        }
254    }
255
256    /**
257     * Returns the comment text from the Javadoc.
258     * @param comments the lines of Javadoc.
259     * @return a comment text String.
260     */
261    private static String getCommentText(String... comments) {
262        final StringBuilder builder = new StringBuilder();
263        for (final String line : comments) {
264            final int textStart = findTextStart(line);
265
266            if (textStart != -1) {
267                if (line.charAt(textStart) == '@') {
268                    //we have found the tag section
269                    break;
270                }
271                builder.append(line.substring(textStart));
272                trimTail(builder);
273                builder.append('\n');
274            }
275        }
276
277        return builder.toString().trim();
278    }
279
280    /**
281     * Finds the index of the first non-whitespace character ignoring the
282     * Javadoc comment start and end strings (&#47** and *&#47) as well as any
283     * leading asterisk.
284     * @param line the Javadoc comment line of text to scan.
285     * @return the int index relative to 0 for the start of text
286     *         or -1 if not found.
287     */
288    private static int findTextStart(String line) {
289        int textStart = -1;
290        for (int i = 0; i < line.length();) {
291            if (!Character.isWhitespace(line.charAt(i))) {
292                if (line.regionMatches(i, "/**", 0, "/**".length())) {
293                    i += 2;
294                }
295                else if (line.regionMatches(i, "*/", 0, 2)) {
296                    i++;
297                }
298                else if (line.charAt(i) != '*') {
299                    textStart = i;
300                    break;
301                }
302            }
303            i++;
304        }
305        return textStart;
306    }
307
308    /**
309     * Trims any trailing whitespace or the end of Javadoc comment string.
310     * @param builder the StringBuilder to trim.
311     */
312    private static void trimTail(StringBuilder builder) {
313        int index = builder.length() - 1;
314        while (true) {
315            if (Character.isWhitespace(builder.charAt(index))) {
316                builder.deleteCharAt(index);
317            }
318            else if (index > 0 && builder.charAt(index) == '/'
319                    && builder.charAt(index - 1) == '*') {
320                builder.deleteCharAt(index);
321                builder.deleteCharAt(index - 1);
322                index--;
323                while (builder.charAt(index - 1) == '*') {
324                    builder.deleteCharAt(index - 1);
325                    index--;
326                }
327            }
328            else {
329                break;
330            }
331            index--;
332        }
333    }
334
335    /**
336     * Checks the comment for HTML tags that do not have a corresponding close
337     * tag or a close tag that has no previous open tag.  This code was
338     * primarily copied from the DocCheck checkHtml method.
339     *
340     * @param ast the node with the Javadoc
341     * @param comment the {@code TextBlock} which represents
342     *                 the Javadoc comment.
343     */
344    // -@cs[ReturnCount] Too complex to break apart.
345    private void checkHtmlTags(final DetailAST ast, final TextBlock comment) {
346        final int lineNo = comment.getStartLineNo();
347        final Deque<HtmlTag> htmlStack = new ArrayDeque<>();
348        final String[] text = comment.getText();
349
350        final TagParser parser = new TagParser(text, lineNo);
351
352        while (parser.hasNextTag()) {
353            final HtmlTag tag = parser.nextTag();
354
355            if (tag.isIncompleteTag()) {
356                log(tag.getLineNo(), MSG_INCOMPLETE_TAG,
357                    text[tag.getLineNo() - lineNo]);
358                return;
359            }
360            if (tag.isClosedTag()) {
361                //do nothing
362                continue;
363            }
364            if (tag.isCloseTag()) {
365                // We have found a close tag.
366                if (isExtraHtml(tag.getId(), htmlStack)) {
367                    // No corresponding open tag was found on the stack.
368                    log(tag.getLineNo(),
369                        tag.getPosition(),
370                        MSG_EXTRA_HTML,
371                        tag);
372                }
373                else {
374                    // See if there are any unclosed tags that were opened
375                    // after this one.
376                    checkUnclosedTags(htmlStack, tag.getId());
377                }
378            }
379            else {
380                //We only push html tags that are allowed
381                if (isAllowedTag(tag)) {
382                    htmlStack.push(tag);
383                }
384            }
385        }
386
387        // Identify any tags left on the stack.
388        // Skip multiples, like <b>...<b>
389        String lastFound = "";
390        final List<String> typeParameters = CheckUtils.getTypeParameterNames(ast);
391        for (final HtmlTag htmlTag : htmlStack) {
392            if (!isSingleTag(htmlTag)
393                && !htmlTag.getId().equals(lastFound)
394                && !typeParameters.contains(htmlTag.getId())) {
395                log(htmlTag.getLineNo(), htmlTag.getPosition(), MSG_UNCLOSED_HTML, htmlTag);
396                lastFound = htmlTag.getId();
397            }
398        }
399    }
400
401    /**
402     * Checks to see if there are any unclosed tags on the stack.  The token
403     * represents a html tag that has been closed and has a corresponding open
404     * tag on the stack.  Any tags, except single tags, that were opened
405     * (pushed on the stack) after the token are missing a close.
406     *
407     * @param htmlStack the stack of opened HTML tags.
408     * @param token the current HTML tag name that has been closed.
409     */
410    private void checkUnclosedTags(Deque<HtmlTag> htmlStack, String token) {
411        final Deque<HtmlTag> unclosedTags = new ArrayDeque<>();
412        HtmlTag lastOpenTag = htmlStack.pop();
413        while (!token.equalsIgnoreCase(lastOpenTag.getId())) {
414            // Find unclosed elements. Put them on a stack so the
415            // output order won't be back-to-front.
416            if (isSingleTag(lastOpenTag)) {
417                lastOpenTag = htmlStack.pop();
418            }
419            else {
420                unclosedTags.push(lastOpenTag);
421                lastOpenTag = htmlStack.pop();
422            }
423        }
424
425        // Output the unterminated tags, if any
426        // Skip multiples, like <b>..<b>
427        String lastFound = "";
428        for (final HtmlTag htag : unclosedTags) {
429            lastOpenTag = htag;
430            if (lastOpenTag.getId().equals(lastFound)) {
431                continue;
432            }
433            lastFound = lastOpenTag.getId();
434            log(lastOpenTag.getLineNo(),
435                lastOpenTag.getPosition(),
436                MSG_UNCLOSED_HTML,
437                lastOpenTag);
438        }
439    }
440
441    /**
442     * Determines if the HtmlTag is one which does not require a close tag.
443     *
444     * @param tag the HtmlTag to check.
445     * @return {@code true} if the HtmlTag is a single tag.
446     */
447    private static boolean isSingleTag(HtmlTag tag) {
448        // If its a singleton tag (<p>, <br>, etc.), ignore it
449        // Can't simply not put them on the stack, since singletons
450        // like <dt> and <dd> (unhappily) may either be terminated
451        // or not terminated. Both options are legal.
452        return SINGLE_TAGS.contains(tag.getId().toLowerCase(Locale.ENGLISH));
453    }
454
455    /**
456     * Determines if the HtmlTag is one which is allowed in a javadoc.
457     *
458     * @param tag the HtmlTag to check.
459     * @return {@code true} if the HtmlTag is an allowed html tag.
460     */
461    private static boolean isAllowedTag(HtmlTag tag) {
462        return ALLOWED_TAGS.contains(tag.getId().toLowerCase(Locale.ENGLISH));
463    }
464
465    /**
466     * Determines if the given token is an extra HTML tag. This indicates that
467     * a close tag was found that does not have a corresponding open tag.
468     *
469     * @param token an HTML tag id for which a close was found.
470     * @param htmlStack a Stack of previous open HTML tags.
471     * @return {@code false} if a previous open tag was found
472     *         for the token.
473     */
474    private static boolean isExtraHtml(String token, Deque<HtmlTag> htmlStack) {
475        boolean isExtra = true;
476        for (final HtmlTag tag : htmlStack) {
477            // Loop, looking for tags that are closed.
478            // The loop is needed in case there are unclosed
479            // tags on the stack. In that case, the stack would
480            // not be empty, but this tag would still be extra.
481            if (token.equalsIgnoreCase(tag.getId())) {
482                isExtra = false;
483                break;
484            }
485        }
486
487        return isExtra;
488    }
489
490    /**
491     * Sets the scope to check.
492     * @param scope a scope.
493     */
494    public void setScope(Scope scope) {
495        this.scope = scope;
496    }
497
498    /**
499     * Set the excludeScope.
500     * @param excludeScope a scope.
501     */
502    public void setExcludeScope(Scope excludeScope) {
503        this.excludeScope = excludeScope;
504    }
505
506    /**
507     * Set the format for matching the end of a sentence.
508     * @param pattern a pattern.
509     */
510    public void setEndOfSentenceFormat(Pattern pattern) {
511        endOfSentenceFormat = pattern;
512    }
513
514    /**
515     * Sets the flag that determines if the first sentence is checked for
516     * proper end of sentence punctuation.
517     * @param flag {@code true} if the first sentence is to be checked
518     */
519    public void setCheckFirstSentence(boolean flag) {
520        checkFirstSentence = flag;
521    }
522
523    /**
524     * Sets the flag that determines if HTML checking is to be performed.
525     * @param flag {@code true} if HTML checking is to be performed.
526     */
527    public void setCheckHtml(boolean flag) {
528        checkHtml = flag;
529    }
530
531    /**
532     * Sets the flag that determines if empty Javadoc checking should be done.
533     * @param flag {@code true} if empty Javadoc checking should be done.
534     */
535    public void setCheckEmptyJavadoc(boolean flag) {
536        checkEmptyJavadoc = flag;
537    }
538}