| 1 | //////////////////////////////////////////////////////////////////////////////// | |
| 2 | // checkstyle: Checks Java source code for adherence to a set of rules. | |
| 3 | // Copyright (C) 2001-2017 the original author or authors. | |
| 4 | // | |
| 5 | // This library is free software; you can redistribute it and/or | |
| 6 | // modify it under the terms of the GNU Lesser General Public | |
| 7 | // License as published by the Free Software Foundation; either | |
| 8 | // version 2.1 of the License, or (at your option) any later version. | |
| 9 | // | |
| 10 | // This library is distributed in the hope that it will be useful, | |
| 11 | // but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| 12 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
| 13 | // Lesser General Public License for more details. | |
| 14 | // | |
| 15 | // You should have received a copy of the GNU Lesser General Public | |
| 16 | // License along with this library; if not, write to the Free Software | |
| 17 | // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
| 18 | //////////////////////////////////////////////////////////////////////////////// | |
| 19 | ||
| 20 | package com.puppycrawl.tools.checkstyle.checks; | |
| 21 | ||
| 22 | import java.util.List; | |
| 23 | import java.util.Map; | |
| 24 | import java.util.regex.Matcher; | |
| 25 | import java.util.regex.Pattern; | |
| 26 | ||
| 27 | import com.puppycrawl.tools.checkstyle.api.AbstractCheck; | |
| 28 | import com.puppycrawl.tools.checkstyle.api.DetailAST; | |
| 29 | import com.puppycrawl.tools.checkstyle.api.TextBlock; | |
| 30 | import com.puppycrawl.tools.checkstyle.api.TokenTypes; | |
| 31 | import com.puppycrawl.tools.checkstyle.utils.CommonUtils; | |
| 32 | ||
| 33 | /** | |
| 34 | * <p> | |
| 35 | * Restrict using <a href = | |
| 36 | * "http://docs.oracle.com/javase/specs/jls/se8/html/jls-3.html#jls-3.3"> | |
| 37 | * Unicode escapes</a> (such as {@code \u221e}). | |
| 38 | * It is possible to allow using escapes for | |
| 39 | * <a href="https://en.wiktionary.org/wiki/Appendix:Control_characters"> | |
| 40 | * non-printable(control) characters</a>. | |
| 41 | * Also, this check can be configured to allow using escapes | |
| 42 | * if trail comment is present. By the option it is possible to | |
| 43 | * allow using escapes if literal contains only them. By the option it | |
| 44 | * is possible to allow using escapes for space literals. | |
| 45 | * </p> | |
| 46 | * <p> | |
| 47 | * Examples of using Unicode:</p> | |
| 48 | * <pre> | |
| 49 | * String unitAbbrev = "μs"; // Best: perfectly clear even without a comment. | |
| 50 | * String unitAbbrev = "\u03bcs"; // Poor: the reader has no idea what this is. | |
| 51 | * </pre> | |
| 52 | * <p> | |
| 53 | * An example of how to configure the check is: | |
| 54 | * </p> | |
| 55 | * <pre> | |
| 56 | * <module name="AvoidEscapedUnicodeCharacters"/> | |
| 57 | * </pre> | |
| 58 | * <p> | |
| 59 | * An example of non-printable(control) characters. | |
| 60 | * </p> | |
| 61 | * <pre> | |
| 62 | * return '\ufeff' + content; // byte order mark | |
| 63 | * </pre> | |
| 64 | * <p> | |
| 65 | * An example of how to configure the check to allow using escapes | |
| 66 | * for non-printable(control) characters: | |
| 67 | * </p> | |
| 68 | * <pre> | |
| 69 | * <module name="AvoidEscapedUnicodeCharacters"> | |
| 70 | * <property name="allowEscapesForControlCharacters" value="true"/> | |
| 71 | * </module> | |
| 72 | * </pre> | |
| 73 | * <p> | |
| 74 | * Example of using escapes with trail comment: | |
| 75 | * </p> | |
| 76 | * <pre> | |
| 77 | * String unitAbbrev = "\u03bcs"; // Greek letter mu, "s" | |
| 78 | * </pre> | |
| 79 | * <p>An example of how to configure the check to allow using escapes | |
| 80 | * if trail comment is present: | |
| 81 | * </p> | |
| 82 | * <pre> | |
| 83 | * <module name="AvoidEscapedUnicodeCharacters"> | |
| 84 | * <property name="allowByTailComment" value="true"/> | |
| 85 | * </module> | |
| 86 | * </pre> | |
| 87 | * <p>Example of using escapes if literal contains only them: | |
| 88 | * </p> | |
| 89 | * <pre> | |
| 90 | * String unitAbbrev = "\u03bc\u03bc\u03bc"; | |
| 91 | * </pre> | |
| 92 | * <p>An example of how to configure the check to allow escapes | |
| 93 | * if literal contains only them: | |
| 94 | * </p> | |
| 95 | * <pre> | |
| 96 | * <module name="AvoidEscapedUnicodeCharacters"> | |
| 97 | * <property name="allowIfAllCharactersEscaped" value="true"/> | |
| 98 | * </module> | |
| 99 | * </pre> | |
| 100 | * <p>An example of how to configure the check to allow non-printable escapes: | |
| 101 | * </p> | |
| 102 | * <pre> | |
| 103 | * <module name="AvoidEscapedUnicodeCharacters"> | |
| 104 | * <property name="allowNonPrintableEscapes" value="true"/> | |
| 105 | * </module> | |
| 106 | * </pre> | |
| 107 | * | |
| 108 | * @author maxvetrenko | |
| 109 | * | |
| 110 | */ | |
| 111 | public class AvoidEscapedUnicodeCharactersCheck | |
| 112 | extends AbstractCheck { | |
| 113 | /** | |
| 114 | * A key is pointing to the warning message text in "messages.properties" | |
| 115 | * file. | |
| 116 | */ | |
| 117 | public static final String MSG_KEY = "forbid.escaped.unicode.char"; | |
| 118 | ||
| 119 | /** Regular expression for Unicode chars. */ | |
| 120 | private static final Pattern UNICODE_REGEXP = Pattern.compile("\\\\u[a-fA-F0-9]{4}"); | |
| 121 | ||
| 122 | /** | |
| 123 | * Regular expression Unicode control characters. | |
| 124 | * | |
| 125 | * @see <a href="https://en.wiktionary.org/wiki/Appendix:Control_characters"> | |
| 126 | * Appendix:Control characters</a> | |
| 127 | */ | |
| 128 | private static final Pattern UNICODE_CONTROL = Pattern.compile("\\\\(u|U)" | |
| 129 | + "(00[0-1][0-9A-Fa-f]|00[8-9][0-9A-Fa-f]|00(a|A)(d|D)|034(f|F)|070(f|F)" | |
| 130 | + "|180(e|E)|200[b-fB-F]|202[a-eA-E]|206[0-4a-fA-F]" | |
| 131 | + "|[fF]{3}[9a-bA-B]|[fF][eE][fF]{2})"); | |
| 132 | ||
| 133 | /** Regular expression for all escaped chars. */ | |
| 134 | private static final Pattern ALL_ESCAPED_CHARS = | |
| 135 | Pattern.compile("^((\\\\u)[a-fA-F0-9]{4}" | |
| 136 | + "||\\\\b|\\\\t|\\\\n|\\\\f|\\\\r|\\\\|\"|\')+$"); | |
| 137 | ||
| 138 | /** Regular expression for escaped backslash. */ | |
| 139 | private static final Pattern ESCAPED_BACKSLASH = Pattern.compile("\\\\\\\\"); | |
| 140 | ||
| 141 | /** Regular expression for non-printable unicode chars. */ | |
| 142 | private static final Pattern NON_PRINTABLE_CHARS = Pattern.compile("\\\\u1680|\\\\u2028" | |
| 143 | + "|\\\\u2029|\\\\u205(f|F)|\\\\u3000|\\\\u2007|\\\\u2000|\\\\u200(a|A)" | |
| 144 | + "|\\\\u007(F|f)|\\\\u009(f|F)|\\\\u(f|F){4}|\\\\u007(F|f)|\\\\u00(a|A)(d|D)" | |
| 145 | + "|\\\\u0600|\\\\u061(c|C)|\\\\u06(d|D){2}|\\\\u070(f|F)|\\\\u1680|\\\\u180(e|E)" | |
| 146 | + "|\\\\u2000|\\\\u2028|\\\\u205(f|F)|\\\\u2066|\\\\u2067|\\\\u2068|\\\\u2069" | |
| 147 | + "|\\\\u206(a|A)|\\\\u(d|D)800|\\\\u(f|F)(e|E)(f|F){2}|\\\\u(f|F){3}9" | |
| 148 | + "|\\\\u(f|F){3}(a|A)|\\\\u0020|\\\\u00(a|A)0|\\\\u00(a|A)(d|D)|\\\\u0604" | |
| 149 | + "|\\\\u061(c|C)|\\\\u06(d|D){2}|\\\\u070(f|F)|\\\\u1680|\\\\u180(e|E)|\\\\u200(f|F)" | |
| 150 | + "|\\\\u202(f|F)|\\\\u2064|\\\\u2066|\\\\u2067|\\\\u2068|\\\\u2069|\\\\u206(f|F)" | |
| 151 | + "|\\\\u(f|F)8(f|F){2}|\\\\u(f|F)(e|E)(f|F){2}|\\\\u(f|F){3}9|\\\\u(f|F){3}(b|B)" | |
| 152 | + "|\\\\u05(d|D)0|\\\\u05(f|F)3|\\\\u0600|\\\\u0750|\\\\u0(e|E)00|\\\\u1(e|E)00" | |
| 153 | + "|\\\\u2100|\\\\u(f|F)(b|B)50|\\\\u(f|F)(e|E)70|\\\\u(F|f){2}61|\\\\u04(f|F)9" | |
| 154 | + "|\\\\u05(b|B)(e|E)|\\\\u05(e|E)(a|A)|\\\\u05(f|F)4|\\\\u06(f|F){2}" | |
| 155 | + "|\\\\u077(f|F)|\\\\u0(e|E)7(f|F)|\\\\u20(a|A)(f|F)|\\\\u213(a|A)|\\\\u0000" | |
| 156 | + "|\\\\u(f|F)(d|D)(f|F){2}|\\\\u(f|F)(e|E)(f|F){2}|\\\\u(f|F){2}(d|D)(c|C)" | |
| 157 | + "|\\\\u2002|\\\\u0085|\\\\u200(a|A)|\\\\u2005|\\\\u2000|\\\\u2029|\\\\u000(B|b)" | |
| 158 | + "|\\\\u2008|\\\\u2003|\\\\u205(f|F)|\\\\u1680|\\\\u0009|\\\\u0020|\\\\u2006" | |
| 159 | + "|\\\\u2001|\\\\u202(f|F)|\\\\u00(a|A)0|\\\\u000(c|C)|\\\\u2009|\\\\u2004|\\\\u2028" | |
| 160 | + "|\\\\u2028|\\\\u2007|\\\\u2004|\\\\u2028|\\\\u2007|\\\\u2025" | |
| 161 | + "|\\\\u(f|F){2}0(e|E)|\\\\u(f|F){2}61"); | |
| 162 | ||
| 163 | /** Cpp style comments. */ | |
| 164 | private Map<Integer, TextBlock> singlelineComments; | |
| 165 | /** C style comments. */ | |
| 166 | private Map<Integer, List<TextBlock>> blockComments; | |
| 167 | ||
| 168 | /** Allow use escapes for non-printable(control) characters. */ | |
| 169 | private boolean allowEscapesForControlCharacters; | |
| 170 | ||
| 171 | /** Allow use escapes if trail comment is present. */ | |
| 172 | private boolean allowByTailComment; | |
| 173 | ||
| 174 | /** Allow if all characters in literal are escaped. */ | |
| 175 | private boolean allowIfAllCharactersEscaped; | |
| 176 | ||
| 177 | /** Allow escapes for space literals. */ | |
| 178 | private boolean allowNonPrintableEscapes; | |
| 179 | ||
| 180 | /** | |
| 181 | * Set allowIfAllCharactersEscaped. | |
| 182 | * @param allow user's value. | |
| 183 | */ | |
| 184 | public final void setAllowEscapesForControlCharacters(boolean allow) { | |
| 185 | allowEscapesForControlCharacters = allow; | |
| 186 | } | |
| 187 | ||
| 188 | /** | |
| 189 | * Set allowByTailComment. | |
| 190 | * @param allow user's value. | |
| 191 | */ | |
| 192 | public final void setAllowByTailComment(boolean allow) { | |
| 193 | allowByTailComment = allow; | |
| 194 | } | |
| 195 | ||
| 196 | /** | |
| 197 | * Set allowIfAllCharactersEscaped. | |
| 198 | * @param allow user's value. | |
| 199 | */ | |
| 200 | public final void setAllowIfAllCharactersEscaped(boolean allow) { | |
| 201 | allowIfAllCharactersEscaped = allow; | |
| 202 | } | |
| 203 | ||
| 204 | /** | |
| 205 | * Set allowSpaceEscapes. | |
| 206 | * @param allow user's value. | |
| 207 | */ | |
| 208 | public final void setAllowNonPrintableEscapes(boolean allow) { | |
| 209 | allowNonPrintableEscapes = allow; | |
| 210 | } | |
| 211 | ||
| 212 | @Override | |
| 213 | public int[] getDefaultTokens() { | |
| 214 |
1
1. getDefaultTokens : mutated return of Object value for com/puppycrawl/tools/checkstyle/checks/AvoidEscapedUnicodeCharactersCheck::getDefaultTokens to ( if (x != null) null else throw new RuntimeException ) → KILLED |
return getAcceptableTokens(); |
| 215 | } | |
| 216 | ||
| 217 | @Override | |
| 218 | public int[] getAcceptableTokens() { | |
| 219 |
1
1. getAcceptableTokens : mutated return of Object value for com/puppycrawl/tools/checkstyle/checks/AvoidEscapedUnicodeCharactersCheck::getAcceptableTokens to ( if (x != null) null else throw new RuntimeException ) → KILLED |
return new int[] {TokenTypes.STRING_LITERAL, TokenTypes.CHAR_LITERAL}; |
| 220 | } | |
| 221 | ||
| 222 | @Override | |
| 223 | public int[] getRequiredTokens() { | |
| 224 |
1
1. getRequiredTokens : mutated return of Object value for com/puppycrawl/tools/checkstyle/checks/AvoidEscapedUnicodeCharactersCheck::getRequiredTokens to ( if (x != null) null else throw new RuntimeException ) → KILLED |
return getAcceptableTokens(); |
| 225 | } | |
| 226 | ||
| 227 | @Override | |
| 228 | public void beginTree(DetailAST rootAST) { | |
| 229 | singlelineComments = getFileContents().getSingleLineComments(); | |
| 230 | blockComments = getFileContents().getBlockComments(); | |
| 231 | } | |
| 232 | ||
| 233 | @Override | |
| 234 | public void visitToken(DetailAST ast) { | |
| 235 | ||
| 236 | final String literal = ast.getText(); | |
| 237 | ||
| 238 |
3
1. visitToken : negated conditional → KILLED 2. visitToken : negated conditional → KILLED 3. visitToken : negated conditional → KILLED |
if (hasUnicodeChar(literal) && !(allowByTailComment && hasTrailComment(ast) |
| 239 |
2
1. visitToken : negated conditional → KILLED 2. visitToken : negated conditional → KILLED |
|| isAllCharactersEscaped(literal) |
| 240 | || allowEscapesForControlCharacters | |
| 241 |
2
1. visitToken : negated conditional → KILLED 2. visitToken : negated conditional → KILLED |
&& isOnlyUnicodeValidChars(literal, UNICODE_CONTROL) |
| 242 | || allowNonPrintableEscapes | |
| 243 |
1
1. visitToken : negated conditional → KILLED |
&& isOnlyUnicodeValidChars(literal, NON_PRINTABLE_CHARS))) { |
| 244 |
1
1. visitToken : removed call to com/puppycrawl/tools/checkstyle/checks/AvoidEscapedUnicodeCharactersCheck::log → KILLED |
log(ast.getLineNo(), MSG_KEY); |
| 245 | } | |
| 246 | } | |
| 247 | ||
| 248 | /** | |
| 249 | * Checks if literal has Unicode chars. | |
| 250 | * @param literal String literal. | |
| 251 | * @return true if literal has Unicode chars. | |
| 252 | */ | |
| 253 | private static boolean hasUnicodeChar(String literal) { | |
| 254 | final String literalWithoutEscapedBackslashes = | |
| 255 | ESCAPED_BACKSLASH.matcher(literal).replaceAll(""); | |
| 256 |
1
1. hasUnicodeChar : replaced return of integer sized value with (x == 0 ? 1 : 0) → KILLED |
return UNICODE_REGEXP.matcher(literalWithoutEscapedBackslashes).find(); |
| 257 | } | |
| 258 | ||
| 259 | /** | |
| 260 | * Check if String literal contains Unicode control chars. | |
| 261 | * @param literal String literal. | |
| 262 | * @param pattern RegExp for valid characters. | |
| 263 | * @return true, if String literal contains Unicode control chars. | |
| 264 | */ | |
| 265 | private static boolean isOnlyUnicodeValidChars(String literal, Pattern pattern) { | |
| 266 | final int unicodeMatchesCounter = | |
| 267 | countMatches(UNICODE_REGEXP, literal); | |
| 268 | final int unicodeValidMatchesCounter = | |
| 269 | countMatches(pattern, literal); | |
| 270 |
3
1. isOnlyUnicodeValidChars : Replaced integer subtraction with addition → KILLED 2. isOnlyUnicodeValidChars : negated conditional → KILLED 3. isOnlyUnicodeValidChars : replaced return of integer sized value with (x == 0 ? 1 : 0) → KILLED |
return unicodeMatchesCounter - unicodeValidMatchesCounter == 0; |
| 271 | } | |
| 272 | ||
| 273 | /** | |
| 274 | * Check if trail comment is present after ast token. | |
| 275 | * @param ast current token. | |
| 276 | * @return true if trail comment is present after ast token. | |
| 277 | */ | |
| 278 | private boolean hasTrailComment(DetailAST ast) { | |
| 279 | boolean result = false; | |
| 280 | final int lineNo = ast.getLineNo(); | |
| 281 |
1
1. hasTrailComment : negated conditional → KILLED |
if (singlelineComments.containsKey(lineNo)) { |
| 282 | result = true; | |
| 283 | } | |
| 284 | else { | |
| 285 | final List<TextBlock> commentList = blockComments.get(lineNo); | |
| 286 |
1
1. hasTrailComment : negated conditional → KILLED |
if (commentList != null) { |
| 287 |
1
1. hasTrailComment : Replaced integer subtraction with addition → KILLED |
final TextBlock comment = commentList.get(commentList.size() - 1); |
| 288 |
1
1. hasTrailComment : Replaced integer subtraction with addition → KILLED |
final String line = getLines()[lineNo - 1]; |
| 289 | result = isTrailingBlockComment(comment, line); | |
| 290 | } | |
| 291 | } | |
| 292 |
1
1. hasTrailComment : replaced return of integer sized value with (x == 0 ? 1 : 0) → KILLED |
return result; |
| 293 | } | |
| 294 | ||
| 295 | /** | |
| 296 | * Whether the C style comment is trailing. | |
| 297 | * @param comment the comment to check. | |
| 298 | * @param line the line where the comment starts. | |
| 299 | * @return true if the comment is trailing. | |
| 300 | */ | |
| 301 | private static boolean isTrailingBlockComment(TextBlock comment, String line) { | |
| 302 |
2
1. isTrailingBlockComment : negated conditional → KILLED 2. isTrailingBlockComment : replaced return of integer sized value with (x == 0 ? 1 : 0) → KILLED |
return comment.getText().length != 1 |
| 303 |
2
1. isTrailingBlockComment : Replaced integer addition with subtraction → KILLED 2. isTrailingBlockComment : negated conditional → KILLED |
|| CommonUtils.isBlank(line.substring(comment.getEndColNo() + 1)); |
| 304 | } | |
| 305 | ||
| 306 | /** | |
| 307 | * Count regexp matches into String literal. | |
| 308 | * @param pattern pattern. | |
| 309 | * @param target String literal. | |
| 310 | * @return count of regexp matches. | |
| 311 | */ | |
| 312 | private static int countMatches(Pattern pattern, String target) { | |
| 313 | int matcherCounter = 0; | |
| 314 | final Matcher matcher = pattern.matcher(target); | |
| 315 |
1
1. countMatches : negated conditional → KILLED |
while (matcher.find()) { |
| 316 |
1
1. countMatches : Changed increment from 1 to -1 → KILLED |
matcherCounter++; |
| 317 | } | |
| 318 |
1
1. countMatches : replaced return of integer sized value with (x == 0 ? 1 : 0) → KILLED |
return matcherCounter; |
| 319 | } | |
| 320 | ||
| 321 | /** | |
| 322 | * Checks if all characters in String literal is escaped. | |
| 323 | * @param literal current literal. | |
| 324 | * @return true if all characters in String literal is escaped. | |
| 325 | */ | |
| 326 | private boolean isAllCharactersEscaped(String literal) { | |
| 327 |
2
1. isAllCharactersEscaped : negated conditional → KILLED 2. isAllCharactersEscaped : replaced return of integer sized value with (x == 0 ? 1 : 0) → KILLED |
return allowIfAllCharactersEscaped |
| 328 | && ALL_ESCAPED_CHARS.matcher(literal.substring(1, | |
| 329 |
2
1. isAllCharactersEscaped : Replaced integer subtraction with addition → KILLED 2. isAllCharactersEscaped : negated conditional → KILLED |
literal.length() - 1)).find(); |
| 330 | } | |
| 331 | } | |
Mutations | ||
| 214 |
1.1 |
|
| 219 |
1.1 |
|
| 224 |
1.1 |
|
| 238 |
1.1 2.2 3.3 |
|
| 239 |
1.1 2.2 |
|
| 241 |
1.1 2.2 |
|
| 243 |
1.1 |
|
| 244 |
1.1 |
|
| 256 |
1.1 |
|
| 270 |
1.1 2.2 3.3 |
|
| 281 |
1.1 |
|
| 286 |
1.1 |
|
| 287 |
1.1 |
|
| 288 |
1.1 |
|
| 292 |
1.1 |
|
| 302 |
1.1 2.2 |
|
| 303 |
1.1 2.2 |
|
| 315 |
1.1 |
|
| 316 |
1.1 |
|
| 318 |
1.1 |
|
| 327 |
1.1 2.2 |
|
| 329 |
1.1 2.2 |