1 | //////////////////////////////////////////////////////////////////////////////// | |
2 | // checkstyle: Checks Java source code for adherence to a set of rules. | |
3 | // Copyright (C) 2001-2017 the original author or authors. | |
4 | // | |
5 | // This library is free software; you can redistribute it and/or | |
6 | // modify it under the terms of the GNU Lesser General Public | |
7 | // License as published by the Free Software Foundation; either | |
8 | // version 2.1 of the License, or (at your option) any later version. | |
9 | // | |
10 | // This library is distributed in the hope that it will be useful, | |
11 | // but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
13 | // Lesser General Public License for more details. | |
14 | // | |
15 | // You should have received a copy of the GNU Lesser General Public | |
16 | // License along with this library; if not, write to the Free Software | |
17 | // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
18 | //////////////////////////////////////////////////////////////////////////////// | |
19 | ||
20 | package com.puppycrawl.tools.checkstyle.checks; | |
21 | ||
22 | import java.util.List; | |
23 | import java.util.Map; | |
24 | import java.util.regex.Matcher; | |
25 | import java.util.regex.Pattern; | |
26 | ||
27 | import com.puppycrawl.tools.checkstyle.api.AbstractCheck; | |
28 | import com.puppycrawl.tools.checkstyle.api.DetailAST; | |
29 | import com.puppycrawl.tools.checkstyle.api.TextBlock; | |
30 | import com.puppycrawl.tools.checkstyle.api.TokenTypes; | |
31 | import com.puppycrawl.tools.checkstyle.utils.CommonUtils; | |
32 | ||
33 | /** | |
34 | * <p> | |
35 | * Restrict using <a href = | |
36 | * "http://docs.oracle.com/javase/specs/jls/se8/html/jls-3.html#jls-3.3"> | |
37 | * Unicode escapes</a> (such as {@code \u221e}). | |
38 | * It is possible to allow using escapes for | |
39 | * <a href="https://en.wiktionary.org/wiki/Appendix:Control_characters"> | |
40 | * non-printable(control) characters</a>. | |
41 | * Also, this check can be configured to allow using escapes | |
42 | * if trail comment is present. By the option it is possible to | |
43 | * allow using escapes if literal contains only them. By the option it | |
44 | * is possible to allow using escapes for space literals. | |
45 | * </p> | |
46 | * <p> | |
47 | * Examples of using Unicode:</p> | |
48 | * <pre> | |
49 | * String unitAbbrev = "μs"; // Best: perfectly clear even without a comment. | |
50 | * String unitAbbrev = "\u03bcs"; // Poor: the reader has no idea what this is. | |
51 | * </pre> | |
52 | * <p> | |
53 | * An example of how to configure the check is: | |
54 | * </p> | |
55 | * <pre> | |
56 | * <module name="AvoidEscapedUnicodeCharacters"/> | |
57 | * </pre> | |
58 | * <p> | |
59 | * An example of non-printable(control) characters. | |
60 | * </p> | |
61 | * <pre> | |
62 | * return '\ufeff' + content; // byte order mark | |
63 | * </pre> | |
64 | * <p> | |
65 | * An example of how to configure the check to allow using escapes | |
66 | * for non-printable(control) characters: | |
67 | * </p> | |
68 | * <pre> | |
69 | * <module name="AvoidEscapedUnicodeCharacters"> | |
70 | * <property name="allowEscapesForControlCharacters" value="true"/> | |
71 | * </module> | |
72 | * </pre> | |
73 | * <p> | |
74 | * Example of using escapes with trail comment: | |
75 | * </p> | |
76 | * <pre> | |
77 | * String unitAbbrev = "\u03bcs"; // Greek letter mu, "s" | |
78 | * </pre> | |
79 | * <p>An example of how to configure the check to allow using escapes | |
80 | * if trail comment is present: | |
81 | * </p> | |
82 | * <pre> | |
83 | * <module name="AvoidEscapedUnicodeCharacters"> | |
84 | * <property name="allowByTailComment" value="true"/> | |
85 | * </module> | |
86 | * </pre> | |
87 | * <p>Example of using escapes if literal contains only them: | |
88 | * </p> | |
89 | * <pre> | |
90 | * String unitAbbrev = "\u03bc\u03bc\u03bc"; | |
91 | * </pre> | |
92 | * <p>An example of how to configure the check to allow escapes | |
93 | * if literal contains only them: | |
94 | * </p> | |
95 | * <pre> | |
96 | * <module name="AvoidEscapedUnicodeCharacters"> | |
97 | * <property name="allowIfAllCharactersEscaped" value="true"/> | |
98 | * </module> | |
99 | * </pre> | |
100 | * <p>An example of how to configure the check to allow non-printable escapes: | |
101 | * </p> | |
102 | * <pre> | |
103 | * <module name="AvoidEscapedUnicodeCharacters"> | |
104 | * <property name="allowNonPrintableEscapes" value="true"/> | |
105 | * </module> | |
106 | * </pre> | |
107 | * | |
108 | * @author maxvetrenko | |
109 | * | |
110 | */ | |
111 | public class AvoidEscapedUnicodeCharactersCheck | |
112 | extends AbstractCheck { | |
113 | /** | |
114 | * A key is pointing to the warning message text in "messages.properties" | |
115 | * file. | |
116 | */ | |
117 | public static final String MSG_KEY = "forbid.escaped.unicode.char"; | |
118 | ||
119 | /** Regular expression for Unicode chars. */ | |
120 | private static final Pattern UNICODE_REGEXP = Pattern.compile("\\\\u[a-fA-F0-9]{4}"); | |
121 | ||
122 | /** | |
123 | * Regular expression Unicode control characters. | |
124 | * | |
125 | * @see <a href="https://en.wiktionary.org/wiki/Appendix:Control_characters"> | |
126 | * Appendix:Control characters</a> | |
127 | */ | |
128 | private static final Pattern UNICODE_CONTROL = Pattern.compile("\\\\(u|U)" | |
129 | + "(00[0-1][0-9A-Fa-f]|00[8-9][0-9A-Fa-f]|00(a|A)(d|D)|034(f|F)|070(f|F)" | |
130 | + "|180(e|E)|200[b-fB-F]|202[a-eA-E]|206[0-4a-fA-F]" | |
131 | + "|[fF]{3}[9a-bA-B]|[fF][eE][fF]{2})"); | |
132 | ||
133 | /** Regular expression for all escaped chars. */ | |
134 | private static final Pattern ALL_ESCAPED_CHARS = | |
135 | Pattern.compile("^((\\\\u)[a-fA-F0-9]{4}" | |
136 | + "||\\\\b|\\\\t|\\\\n|\\\\f|\\\\r|\\\\|\"|\')+$"); | |
137 | ||
138 | /** Regular expression for escaped backslash. */ | |
139 | private static final Pattern ESCAPED_BACKSLASH = Pattern.compile("\\\\\\\\"); | |
140 | ||
141 | /** Regular expression for non-printable unicode chars. */ | |
142 | private static final Pattern NON_PRINTABLE_CHARS = Pattern.compile("\\\\u1680|\\\\u2028" | |
143 | + "|\\\\u2029|\\\\u205(f|F)|\\\\u3000|\\\\u2007|\\\\u2000|\\\\u200(a|A)" | |
144 | + "|\\\\u007(F|f)|\\\\u009(f|F)|\\\\u(f|F){4}|\\\\u007(F|f)|\\\\u00(a|A)(d|D)" | |
145 | + "|\\\\u0600|\\\\u061(c|C)|\\\\u06(d|D){2}|\\\\u070(f|F)|\\\\u1680|\\\\u180(e|E)" | |
146 | + "|\\\\u2000|\\\\u2028|\\\\u205(f|F)|\\\\u2066|\\\\u2067|\\\\u2068|\\\\u2069" | |
147 | + "|\\\\u206(a|A)|\\\\u(d|D)800|\\\\u(f|F)(e|E)(f|F){2}|\\\\u(f|F){3}9" | |
148 | + "|\\\\u(f|F){3}(a|A)|\\\\u0020|\\\\u00(a|A)0|\\\\u00(a|A)(d|D)|\\\\u0604" | |
149 | + "|\\\\u061(c|C)|\\\\u06(d|D){2}|\\\\u070(f|F)|\\\\u1680|\\\\u180(e|E)|\\\\u200(f|F)" | |
150 | + "|\\\\u202(f|F)|\\\\u2064|\\\\u2066|\\\\u2067|\\\\u2068|\\\\u2069|\\\\u206(f|F)" | |
151 | + "|\\\\u(f|F)8(f|F){2}|\\\\u(f|F)(e|E)(f|F){2}|\\\\u(f|F){3}9|\\\\u(f|F){3}(b|B)" | |
152 | + "|\\\\u05(d|D)0|\\\\u05(f|F)3|\\\\u0600|\\\\u0750|\\\\u0(e|E)00|\\\\u1(e|E)00" | |
153 | + "|\\\\u2100|\\\\u(f|F)(b|B)50|\\\\u(f|F)(e|E)70|\\\\u(F|f){2}61|\\\\u04(f|F)9" | |
154 | + "|\\\\u05(b|B)(e|E)|\\\\u05(e|E)(a|A)|\\\\u05(f|F)4|\\\\u06(f|F){2}" | |
155 | + "|\\\\u077(f|F)|\\\\u0(e|E)7(f|F)|\\\\u20(a|A)(f|F)|\\\\u213(a|A)|\\\\u0000" | |
156 | + "|\\\\u(f|F)(d|D)(f|F){2}|\\\\u(f|F)(e|E)(f|F){2}|\\\\u(f|F){2}(d|D)(c|C)" | |
157 | + "|\\\\u2002|\\\\u0085|\\\\u200(a|A)|\\\\u2005|\\\\u2000|\\\\u2029|\\\\u000(B|b)" | |
158 | + "|\\\\u2008|\\\\u2003|\\\\u205(f|F)|\\\\u1680|\\\\u0009|\\\\u0020|\\\\u2006" | |
159 | + "|\\\\u2001|\\\\u202(f|F)|\\\\u00(a|A)0|\\\\u000(c|C)|\\\\u2009|\\\\u2004|\\\\u2028" | |
160 | + "|\\\\u2028|\\\\u2007|\\\\u2004|\\\\u2028|\\\\u2007|\\\\u2025" | |
161 | + "|\\\\u(f|F){2}0(e|E)|\\\\u(f|F){2}61"); | |
162 | ||
163 | /** Cpp style comments. */ | |
164 | private Map<Integer, TextBlock> singlelineComments; | |
165 | /** C style comments. */ | |
166 | private Map<Integer, List<TextBlock>> blockComments; | |
167 | ||
168 | /** Allow use escapes for non-printable(control) characters. */ | |
169 | private boolean allowEscapesForControlCharacters; | |
170 | ||
171 | /** Allow use escapes if trail comment is present. */ | |
172 | private boolean allowByTailComment; | |
173 | ||
174 | /** Allow if all characters in literal are escaped. */ | |
175 | private boolean allowIfAllCharactersEscaped; | |
176 | ||
177 | /** Allow escapes for space literals. */ | |
178 | private boolean allowNonPrintableEscapes; | |
179 | ||
180 | /** | |
181 | * Set allowIfAllCharactersEscaped. | |
182 | * @param allow user's value. | |
183 | */ | |
184 | public final void setAllowEscapesForControlCharacters(boolean allow) { | |
185 | allowEscapesForControlCharacters = allow; | |
186 | } | |
187 | ||
188 | /** | |
189 | * Set allowByTailComment. | |
190 | * @param allow user's value. | |
191 | */ | |
192 | public final void setAllowByTailComment(boolean allow) { | |
193 | allowByTailComment = allow; | |
194 | } | |
195 | ||
196 | /** | |
197 | * Set allowIfAllCharactersEscaped. | |
198 | * @param allow user's value. | |
199 | */ | |
200 | public final void setAllowIfAllCharactersEscaped(boolean allow) { | |
201 | allowIfAllCharactersEscaped = allow; | |
202 | } | |
203 | ||
204 | /** | |
205 | * Set allowSpaceEscapes. | |
206 | * @param allow user's value. | |
207 | */ | |
208 | public final void setAllowNonPrintableEscapes(boolean allow) { | |
209 | allowNonPrintableEscapes = allow; | |
210 | } | |
211 | ||
212 | @Override | |
213 | public int[] getDefaultTokens() { | |
214 |
1
1. getDefaultTokens : mutated return of Object value for com/puppycrawl/tools/checkstyle/checks/AvoidEscapedUnicodeCharactersCheck::getDefaultTokens to ( if (x != null) null else throw new RuntimeException ) → KILLED |
return getAcceptableTokens(); |
215 | } | |
216 | ||
217 | @Override | |
218 | public int[] getAcceptableTokens() { | |
219 |
1
1. getAcceptableTokens : mutated return of Object value for com/puppycrawl/tools/checkstyle/checks/AvoidEscapedUnicodeCharactersCheck::getAcceptableTokens to ( if (x != null) null else throw new RuntimeException ) → KILLED |
return new int[] {TokenTypes.STRING_LITERAL, TokenTypes.CHAR_LITERAL}; |
220 | } | |
221 | ||
222 | @Override | |
223 | public int[] getRequiredTokens() { | |
224 |
1
1. getRequiredTokens : mutated return of Object value for com/puppycrawl/tools/checkstyle/checks/AvoidEscapedUnicodeCharactersCheck::getRequiredTokens to ( if (x != null) null else throw new RuntimeException ) → KILLED |
return getAcceptableTokens(); |
225 | } | |
226 | ||
227 | @Override | |
228 | public void beginTree(DetailAST rootAST) { | |
229 | singlelineComments = getFileContents().getSingleLineComments(); | |
230 | blockComments = getFileContents().getBlockComments(); | |
231 | } | |
232 | ||
233 | @Override | |
234 | public void visitToken(DetailAST ast) { | |
235 | ||
236 | final String literal = ast.getText(); | |
237 | ||
238 |
3
1. visitToken : negated conditional → KILLED 2. visitToken : negated conditional → KILLED 3. visitToken : negated conditional → KILLED |
if (hasUnicodeChar(literal) && !(allowByTailComment && hasTrailComment(ast) |
239 |
2
1. visitToken : negated conditional → KILLED 2. visitToken : negated conditional → KILLED |
|| isAllCharactersEscaped(literal) |
240 | || allowEscapesForControlCharacters | |
241 |
2
1. visitToken : negated conditional → KILLED 2. visitToken : negated conditional → KILLED |
&& isOnlyUnicodeValidChars(literal, UNICODE_CONTROL) |
242 | || allowNonPrintableEscapes | |
243 |
1
1. visitToken : negated conditional → KILLED |
&& isOnlyUnicodeValidChars(literal, NON_PRINTABLE_CHARS))) { |
244 |
1
1. visitToken : removed call to com/puppycrawl/tools/checkstyle/checks/AvoidEscapedUnicodeCharactersCheck::log → KILLED |
log(ast.getLineNo(), MSG_KEY); |
245 | } | |
246 | } | |
247 | ||
248 | /** | |
249 | * Checks if literal has Unicode chars. | |
250 | * @param literal String literal. | |
251 | * @return true if literal has Unicode chars. | |
252 | */ | |
253 | private static boolean hasUnicodeChar(String literal) { | |
254 | final String literalWithoutEscapedBackslashes = | |
255 | ESCAPED_BACKSLASH.matcher(literal).replaceAll(""); | |
256 |
1
1. hasUnicodeChar : replaced return of integer sized value with (x == 0 ? 1 : 0) → KILLED |
return UNICODE_REGEXP.matcher(literalWithoutEscapedBackslashes).find(); |
257 | } | |
258 | ||
259 | /** | |
260 | * Check if String literal contains Unicode control chars. | |
261 | * @param literal String literal. | |
262 | * @param pattern RegExp for valid characters. | |
263 | * @return true, if String literal contains Unicode control chars. | |
264 | */ | |
265 | private static boolean isOnlyUnicodeValidChars(String literal, Pattern pattern) { | |
266 | final int unicodeMatchesCounter = | |
267 | countMatches(UNICODE_REGEXP, literal); | |
268 | final int unicodeValidMatchesCounter = | |
269 | countMatches(pattern, literal); | |
270 |
3
1. isOnlyUnicodeValidChars : Replaced integer subtraction with addition → KILLED 2. isOnlyUnicodeValidChars : negated conditional → KILLED 3. isOnlyUnicodeValidChars : replaced return of integer sized value with (x == 0 ? 1 : 0) → KILLED |
return unicodeMatchesCounter - unicodeValidMatchesCounter == 0; |
271 | } | |
272 | ||
273 | /** | |
274 | * Check if trail comment is present after ast token. | |
275 | * @param ast current token. | |
276 | * @return true if trail comment is present after ast token. | |
277 | */ | |
278 | private boolean hasTrailComment(DetailAST ast) { | |
279 | boolean result = false; | |
280 | final int lineNo = ast.getLineNo(); | |
281 |
1
1. hasTrailComment : negated conditional → KILLED |
if (singlelineComments.containsKey(lineNo)) { |
282 | result = true; | |
283 | } | |
284 | else { | |
285 | final List<TextBlock> commentList = blockComments.get(lineNo); | |
286 |
1
1. hasTrailComment : negated conditional → KILLED |
if (commentList != null) { |
287 |
1
1. hasTrailComment : Replaced integer subtraction with addition → KILLED |
final TextBlock comment = commentList.get(commentList.size() - 1); |
288 |
1
1. hasTrailComment : Replaced integer subtraction with addition → KILLED |
final String line = getLines()[lineNo - 1]; |
289 | result = isTrailingBlockComment(comment, line); | |
290 | } | |
291 | } | |
292 |
1
1. hasTrailComment : replaced return of integer sized value with (x == 0 ? 1 : 0) → KILLED |
return result; |
293 | } | |
294 | ||
295 | /** | |
296 | * Whether the C style comment is trailing. | |
297 | * @param comment the comment to check. | |
298 | * @param line the line where the comment starts. | |
299 | * @return true if the comment is trailing. | |
300 | */ | |
301 | private static boolean isTrailingBlockComment(TextBlock comment, String line) { | |
302 |
2
1. isTrailingBlockComment : negated conditional → KILLED 2. isTrailingBlockComment : replaced return of integer sized value with (x == 0 ? 1 : 0) → KILLED |
return comment.getText().length != 1 |
303 |
2
1. isTrailingBlockComment : Replaced integer addition with subtraction → KILLED 2. isTrailingBlockComment : negated conditional → KILLED |
|| CommonUtils.isBlank(line.substring(comment.getEndColNo() + 1)); |
304 | } | |
305 | ||
306 | /** | |
307 | * Count regexp matches into String literal. | |
308 | * @param pattern pattern. | |
309 | * @param target String literal. | |
310 | * @return count of regexp matches. | |
311 | */ | |
312 | private static int countMatches(Pattern pattern, String target) { | |
313 | int matcherCounter = 0; | |
314 | final Matcher matcher = pattern.matcher(target); | |
315 |
1
1. countMatches : negated conditional → KILLED |
while (matcher.find()) { |
316 |
1
1. countMatches : Changed increment from 1 to -1 → KILLED |
matcherCounter++; |
317 | } | |
318 |
1
1. countMatches : replaced return of integer sized value with (x == 0 ? 1 : 0) → KILLED |
return matcherCounter; |
319 | } | |
320 | ||
321 | /** | |
322 | * Checks if all characters in String literal is escaped. | |
323 | * @param literal current literal. | |
324 | * @return true if all characters in String literal is escaped. | |
325 | */ | |
326 | private boolean isAllCharactersEscaped(String literal) { | |
327 |
2
1. isAllCharactersEscaped : negated conditional → KILLED 2. isAllCharactersEscaped : replaced return of integer sized value with (x == 0 ? 1 : 0) → KILLED |
return allowIfAllCharactersEscaped |
328 | && ALL_ESCAPED_CHARS.matcher(literal.substring(1, | |
329 |
2
1. isAllCharactersEscaped : Replaced integer subtraction with addition → KILLED 2. isAllCharactersEscaped : negated conditional → KILLED |
literal.length() - 1)).find(); |
330 | } | |
331 | } | |
Mutations | ||
214 |
1.1 |
|
219 |
1.1 |
|
224 |
1.1 |
|
238 |
1.1 2.2 3.3 |
|
239 |
1.1 2.2 |
|
241 |
1.1 2.2 |
|
243 |
1.1 |
|
244 |
1.1 |
|
256 |
1.1 |
|
270 |
1.1 2.2 3.3 |
|
281 |
1.1 |
|
286 |
1.1 |
|
287 |
1.1 |
|
288 |
1.1 |
|
292 |
1.1 |
|
302 |
1.1 2.2 |
|
303 |
1.1 2.2 |
|
315 |
1.1 |
|
316 |
1.1 |
|
318 |
1.1 |
|
327 |
1.1 2.2 |
|
329 |
1.1 2.2 |