1
2
3
4 package net.sourceforge.pmd.cpd;
5
6 import java.io.StringReader;
7 import java.util.*;
8
9 import net.sourceforge.pmd.lang.LanguageVersion;
10 import net.sourceforge.pmd.lang.LanguageVersionHandler;
11 import net.sourceforge.pmd.lang.TokenManager;
12 import net.sourceforge.pmd.lang.java.ast.JavaParserConstants;
13 import net.sourceforge.pmd.lang.java.ast.Token;
14
15 public class JavaTokenizer implements Tokenizer {
16
17 public static final String IGNORE_LITERALS = "ignore_literals";
18 public static final String IGNORE_IDENTIFIERS = "ignore_identifiers";
19 public static final String IGNORE_ANNOTATIONS = "ignore_annotations";
20 public static final String CPD_START = "\"CPD-START\"";
21 public static final String CPD_END = "\"CPD-END\"";
22
23 private boolean ignoreAnnotations;
24 private boolean ignoreLiterals;
25 private boolean ignoreIdentifiers;
26 List<Discarder> discarders = new ArrayList<Discarder>();
27
28
29 public void setProperties(Properties properties) {
30 ignoreAnnotations = Boolean.parseBoolean(properties.getProperty(IGNORE_ANNOTATIONS, "false"));
31 ignoreLiterals = Boolean.parseBoolean(properties.getProperty(IGNORE_LITERALS, "false"));
32 ignoreIdentifiers = Boolean.parseBoolean(properties.getProperty(IGNORE_IDENTIFIERS, "false"));
33 }
34
35 public void tokenize(SourceCode sourceCode, Tokens tokenEntries) {
36 StringBuilder stringBuilder = sourceCode.getCodeBuffer();
37
38
39 LanguageVersionHandler languageVersionHandler = LanguageVersion.JAVA_14.getLanguageVersionHandler();
40 String fileName = sourceCode.getFileName();
41 TokenManager tokenMgr = languageVersionHandler.getParser(languageVersionHandler.getDefaultParserOptions()).getTokenManager(
42 fileName, new StringReader(stringBuilder.toString()));
43 Token currentToken = (Token) tokenMgr.getNextToken();
44
45 initDiscarders();
46
47 while (currentToken.image.length() > 0) {
48 for (Discarder discarder : discarders) {
49 discarder.add(currentToken);
50 }
51
52 if (inDiscardingState()) {
53 currentToken = (Token) tokenMgr.getNextToken();
54 continue;
55 }
56
57
58 if (currentToken.kind != JavaParserConstants.SEMICOLON) {
59 processToken(tokenEntries, fileName, currentToken);
60 }
61 currentToken = (Token) tokenMgr.getNextToken();
62 }
63 tokenEntries.add(TokenEntry.getEOF());
64 }
65
66 private void processToken(Tokens tokenEntries, String fileName, Token currentToken) {
67 String image = currentToken.image;
68 if (ignoreLiterals
69 && (currentToken.kind == JavaParserConstants.STRING_LITERAL
70 || currentToken.kind == JavaParserConstants.CHARACTER_LITERAL
71 || currentToken.kind == JavaParserConstants.DECIMAL_LITERAL || currentToken.kind == JavaParserConstants.FLOATING_POINT_LITERAL)) {
72 image = String.valueOf(currentToken.kind);
73 }
74 if (ignoreIdentifiers && currentToken.kind == JavaParserConstants.IDENTIFIER) {
75 image = String.valueOf(currentToken.kind);
76 }
77 tokenEntries.add(new TokenEntry(image, fileName, currentToken.beginLine));
78 }
79
80 private void initDiscarders() {
81 if (ignoreAnnotations)
82 discarders.add(new AnnotationStateDiscarder());
83 discarders.add(new SuppressCPDDiscarder());
84 discarders.add(new KeyWordToSemiColonStateDiscarder(JavaParserConstants.IMPORT));
85 discarders.add(new KeyWordToSemiColonStateDiscarder(JavaParserConstants.PACKAGE));
86 }
87
88 private boolean inDiscardingState() {
89 boolean discarding = false;
90 for (Discarder discarder : discarders) {
91 if (discarder.isDiscarding())
92 discarding = true;
93 }
94 return discarding;
95 }
96
97 public void setIgnoreLiterals(boolean ignore) {
98 this.ignoreLiterals = ignore;
99 }
100
101 public void setIgnoreIdentifiers(boolean ignore) {
102 this.ignoreIdentifiers = ignore;
103 }
104
105 public void setIgnoreAnnotations(boolean ignoreAnnotations) {
106 this.ignoreAnnotations = ignoreAnnotations;
107 }
108
109 static public interface Discarder {
110 public void add(Token token);
111
112 public boolean isDiscarding();
113 }
114
115 static public class AnnotationStateDiscarder implements Discarder {
116
117 Stack<Token> tokenStack = new Stack<Token>();
118
119 public void add(Token token) {
120 if (isDiscarding() && tokenStack.size() == 2 && token.kind != JavaParserConstants.LPAREN) {
121 tokenStack.clear();
122 }
123
124 if (token.kind == JavaParserConstants.AT && !isDiscarding()) {
125 tokenStack.push(token);
126 return;
127 }
128 if (token.kind == JavaParserConstants.RPAREN && isDiscarding()) {
129 Token popped = null;
130 while ((popped = tokenStack.pop()).kind != JavaParserConstants.LPAREN) ;
131 return;
132
133 } else {
134 if (isDiscarding())
135 tokenStack.push(token);
136 }
137 }
138
139 public boolean isDiscarding() {
140 return !tokenStack.isEmpty();
141 }
142
143 }
144
145 static public class KeyWordToSemiColonStateDiscarder implements Discarder {
146
147 private final int keyword;
148 Stack<Token> tokenStack = new Stack<Token>();
149
150 public KeyWordToSemiColonStateDiscarder(int keyword) {
151 this.keyword = keyword;
152 }
153
154 public void add(Token token) {
155 if (token.kind == keyword)
156 tokenStack.add(token);
157 if (token.kind == JavaParserConstants.SEMICOLON && isDiscarding())
158 tokenStack.clear();
159 }
160
161 public boolean isDiscarding() {
162 return !tokenStack.isEmpty();
163 }
164
165 }
166
167 static public class SuppressCPDDiscarder implements Discarder {
168 AnnotationStateDiscarder asm = new AnnotationStateDiscarder();
169 Boolean discarding = false;
170
171 public void add(Token token) {
172 asm.add(token);
173
174 if (asm.isDiscarding()) {
175 if (CPD_START.equals(token.image))
176 discarding = true;
177 if (CPD_END.equals(token.image) && discarding)
178 discarding = false;
179 }
180 }
181
182 public boolean isDiscarding() {
183 return discarding;
184 }
185
186 }
187
188 }